In [None]:
# Import required libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd 

In [None]:
# Initialize the Chrome WebDriver
driver = webdriver.Chrome()
# Maximize the browser window for better visibility
driver.maximize_window()
# Open the target webpage
driver.get('https://resultscui.active.com/events/adidas10KParis2024')

In [None]:
# Locate the main content section of the event page using its class name
container = driver.find_element(by= By.CLASS_NAME, value = 'event-home__content')

In [None]:
# Continuously try to click the "Load more" button until no more content is available
while True:
 try:
    # Try to find the "Load more" button inside the container using its XPath
    tag_load_more = container.find_element(by = By.XPATH, value = '//a[contains(@class, "view-more-list__view-more-link uppercase")]')
 except Exception:
    # If the button is not found, print a message and exit the loop
    print('All results have been loaded')
    break
 else:
    # If found, click the button to load more content
    tag_load_more.click()
   # Wait a few seconds to allow the new content to load before continuing
    time.sleep(5)

# GET ALL INFO OF ITEMS

In [None]:
# Create an empty list to store extracted data
lst = []
# Find all elements on the page with class name 'event-home__item'
items = container.find_elements(by=By.CLASS_NAME, value = 'event-home__item')

for i in items:
    # Get the visible text of the element and remove leading/trailing spaces
    full_text = i.text.strip()
    
    # Split the text into individual lines based on newline characters
    lines = full_text.split("\n")
    
    # If the element has fewer than 9 lines of text, add placeholders
    if len(lines) < 9:
        lines.append('')
        lines.append('Gap')
        
    # Add the processed list of lines to the main list
    lst.append(lines)
    
# Print the final list containing all extracted event items
print(lst)

In [None]:
# Define column names for the DataFrame
columns = ['Rank', 'Bib', 'Bib_name', 'Name', 'gender', 'finish_time', 'finish_text', 'gap_time', 'gap_text']

# Create a DataFrame from the list 'lst' with the specified columns
df = pd.DataFrame(lst, columns = columns)
df.head()

Unnamed: 0,Rank,Bib,Bib_name,Name,gender,finish_time,finish_text,gap_time,gap_text
0,1,2,Bib,Hassan CHAHDI,M | Age 35,00:29:37,Finish,,Gap
1,2,1,Bib,Florian CARVALHO,M | Age 35,00:29:43,Finish,+ 00:00:06,Gap
2,3,18,Bib,Yohan DURAND,M | Age 39,00:29:44,Finish,+ 00:00:07,Gap
3,4,128,Bib,Youssef KHADIRI,M | Age 30,00:29:54,Finish,+ 00:00:17,Gap
4,5,50,Bib,Dragos-Luca POP,M | Age 19,00:29:55,Finish,+ 00:00:18,Gap


In [None]:
# Split the 'gender' column into two new columns: 'Gender' and 'Age'
df[['Gender', 'Age']] = df['gender'].str.split('|', expand=True)
df.head()

Unnamed: 0,Rank,Bib,Bib_name,Name,gender,finish_time,finish_text,gap_time,gap_text,Gender,Age
0,1,2,Bib,Hassan CHAHDI,M | Age 35,00:29:37,Finish,,Gap,M,Age 35
1,2,1,Bib,Florian CARVALHO,M | Age 35,00:29:43,Finish,+ 00:00:06,Gap,M,Age 35
2,3,18,Bib,Yohan DURAND,M | Age 39,00:29:44,Finish,+ 00:00:07,Gap,M,Age 39
3,4,128,Bib,Youssef KHADIRI,M | Age 30,00:29:54,Finish,+ 00:00:17,Gap,M,Age 30
4,5,50,Bib,Dragos-Luca POP,M | Age 19,00:29:55,Finish,+ 00:00:18,Gap,M,Age 19


In [86]:
df.columns

Index(['Rank', 'Bib', 'Bib_name', 'Name', 'gender', 'finish_time',
       'finish_text', 'gap_time', 'gap_text', 'Gender', 'Age'],
      dtype='object')

In [None]:
# Select and reorder the final columns to create a clean DataFrame
final_df = df[['Rank', 'Bib', 'Bib_name', 'Name', 'finish_time',
       'finish_text', 'gap_time', 'gap_text', 'Gender', 'Age']]
final_df.head()

Unnamed: 0,Rank,Bib,Bib_name,Name,finish_time,finish_text,gap_time,gap_text,Gender,Age
0,1,2,Bib,Hassan CHAHDI,00:29:37,Finish,,Gap,M,Age 35
1,2,1,Bib,Florian CARVALHO,00:29:43,Finish,+ 00:00:06,Gap,M,Age 35
2,3,18,Bib,Yohan DURAND,00:29:44,Finish,+ 00:00:07,Gap,M,Age 39
3,4,128,Bib,Youssef KHADIRI,00:29:54,Finish,+ 00:00:17,Gap,M,Age 30
4,5,50,Bib,Dragos-Luca POP,00:29:55,Finish,+ 00:00:18,Gap,M,Age 19


In [88]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11660 entries, 0 to 11659
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Rank         11660 non-null  object
 1   Bib          11660 non-null  object
 2   Bib_name     11660 non-null  object
 3   Name         11660 non-null  object
 4   finish_time  11660 non-null  object
 5   finish_text  11660 non-null  object
 6   gap_time     11660 non-null  object
 7   gap_text     11660 non-null  object
 8   Gender       11660 non-null  object
 9   Age          11660 non-null  object
dtypes: object(10)
memory usage: 911.1+ KB


In [None]:
# Export the final DataFrame with 11k results to a CSV file
final_df.to_csv('adidas10KParis2024_top11660.csv')