# Scraping ISFC data

In [129]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
from urllib.parse import urlparse, parse_qs
import pandas as pd

In [95]:
def link_generator(original_link):
    try:
        # Parse the URL
        parsed_url = urlparse(original_link)

        # Extract parameters
        query_params = parse_qs(parsed_url.query)
        event_number = query_params.get('event', [None])[0]
        result_number = query_params.get('result', [None])[0]

        # Check if the required parameters are present
        if event_number is not None and result_number is not None:
            # Convert to integers if needed
            event_number = int(event_number)
            result_number = int(result_number)

            # Create the second URL
            return f'https://components.ifsc-climbing.org/result-complete/?event={event_number}&result={result_number}'

        else:
            return 'Error: Missing required parameters (event or result) in the URL.'

    except Exception as e:
        return f'Error: {e}'

In [130]:
options = Options()
# options.add_argument('--headless')
options.add_argument('--incognito')
driver = webdriver.Chrome(options=options)


driver.get("https://components.ifsc-climbing.org/calendar/")
driver.implicitly_wait(0.5)

In [131]:
menu = Select(driver.find_element(By.ID, "yearSelect"))
menu.select_by_visible_text('2023')

In [133]:
dropdown = driver.find_element(By.ID, "yearSelect")

In [138]:
values = dropdown.find_elements(By.CSS_SELECTOR, 'option')

for v in values:
    print(v.text)


2024
2023
2022
2021
2020
2019
2018
2017
2016
2015
2014
2013
2012
2011
2010
2009
2008
2007
2006
2005
2004
2003
2002
2001
2000
1999
1998
1997
1996
1995
1994
1993
1992
1991
1990


In [98]:
competitions = driver.find_elements(By.CLASS_NAME, 'competition')

for c in competitions:
    print(c.find_element(By.CLASS_NAME, 'title').text)
    print(c.find_element(By.CLASS_NAME, 'date').text)
    
    # Get categories
    categories = c.find_elements(By.CLASS_NAME, 'tag')

    if categories:
        for cat in categories:
            a_tag = cat.find_element(By.TAG_NAME, 'a')
            href_link = a_tag.get_attribute("href")
            new_href = link_generator(href_link)
            print(f"{cat.text}:{new_href}")
            
    break
            

IFSC World Cup Hachioji 2023
21 April - 23 April 2023
BOULDER Men:https://components.ifsc-climbing.org/result-complete/?event=1291&result=3
BOULDER Women:https://components.ifsc-climbing.org/result-complete/?event=1291&result=7


In [107]:
options = Options()
# options.add_argument('--headless')
options.add_argument('--incognito')
driver = webdriver.Chrome(options=options)


driver.get("https://components.ifsc-climbing.org/result-complete/?event=1291&result=3")
driver.implicitly_wait(0.5)

In [108]:
table = driver.find_element(By.CSS_SELECTOR, 'table')

In [124]:
# Set up a web driver (replace with the appropriate driver for your browser)
driver = webdriver.Chrome()

# Example URL (replace with the actual URL)
url = 'https://example.com'

# Navigate to the webpage
driver.get(url)

# Find the table element (replace with the appropriate selector for your table)
table = driver.find_element(By.CSS_SELECTOR, 'table')

# Initialize empty lists to store table data
table_header = []
table_data = []

# Find the header row (within the thead section)
thead = table.find_element(By.CSS_SELECTOR, 'thead')
header_row = thead.find_element(By.CSS_SELECTOR, 'tr')

# Extract header cell text
for cell in header_row.find_elements(By.CSS_SELECTOR, 'th'):
    header_text = cell.text
    table_header.append(header_text)

# Find and iterate through data rows (within the tbody section)
tbody = table.find_element(By.CSS_SELECTOR, 'tbody')
for row in tbody.find_elements(By.CSS_SELECTOR, 'tr'):
    row_data = []

    # Iterate through columns in the current row
    for cell in row.find_elements(By.CSS_SELECTOR, 'td'):
        # Extract text from each cell
        cell_text = cell.text
        row_data.append(cell_text)

    # Add the row data to the table data list
    table_data.append(row_data)

# Close the browser
driver.quit()

# Create a Pandas DataFrame from the table data
df = pd.DataFrame(table_data, columns=table_header)

# Print the scraped table header and data using Pandas DataFrame
print("Table Header:")
print(df.columns)
print("Table Data:")
print(df)

# Export the DataFrame to a CSV file
csv_file_path = 'table_data_pandas.csv'
df.to_csv(csv_file_path, index=False, encoding='utf-8')



In [112]:
table_data

[<selenium.webdriver.remote.webelement.WebElement (session="3c56a43cd41076b1634d1ed18005178e", element="172029010631E4DFC2073E4A0529EF77_element_745")>,
 [],
 ['1', 'MEJDI', 'SCHALCK', 'FRA', '4t4z 6 6', '1t4z 2 9', '2t3z 7 7'],
 ['2', 'HANNES', 'VAN DUYSEN', 'BEL', '3t5z 9 15', '1t4z 8 14', '1t3z 2 11'],
 ['3', 'PAUL', 'JENFT', 'FRA', '4t5z 10 9', '2t4z 11 13', '1t3z 3 3'],
 ['4', 'KOKORO', 'FUJII', 'JPN', '4t5z 8 17', '1t2z 3 4', '0t3z 0 9'],
 ['5', 'SORATO', 'ANRAKU', 'JPN', '3t5z 4 17', '1t4z 1 12', '0t3z 0 11'],
 ['6', 'JONGWON', 'CHON', 'KOR', '3t4z 3 10', '2t2z 6 5', '0t3z 0 13'],
 ['7', 'DOHYUN', 'LEE', 'KOR', '5t5z 10 10', '1t2z 5 5', ''],
 ['8', 'MEICHI', 'NARASAKI', 'JPN', '3t4z 5 10', '0t4z 0 10', ''],
 ['9', 'EDVARDS', 'GRUZITIS', 'LAT', '4t5z 18 12', '0t4z 0 11', ''],
 ['10', 'TOMOA', 'NARASAKI', 'JPN', '5t5z 20 13', '0t4z 0 12', ''],
 ['11', 'YANNICK', 'FLOHÉ', 'GER', '4t5z 12 14', '0t3z 0 9', ''],
 ['12', 'LEO', 'AVEZOU', 'FRA', '4t5z 13 10', '0t3z 0 14', ''],
 ['13', '

In [139]:
driver.quit()