In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO

In [2]:
# URL of the POST request
post_url = 'https://results.ridelondon.co.uk/2024/?pid=list&pidp=start'

# Base URL for the GET requests
base_url = 'https://results.ridelondon.co.uk/2024/'

# Form data to be sent with the POST request
form_data = {
    'lang': 'EN_CAP',
    'startpage': 'start_responsive',
    'startpage_type': 'lists',
    'event': 'I',
    'search[sex]': 'M',
    'num_results': '100',
    'submit': ''
}

# Sending the POST request with the form data
response = requests.post(post_url, data=form_data)

# Final DataFrame to hold the concatenated data from all pages
final_df = pd.DataFrame()

# Check if the request was successful
if response.status_code == 200:
    # Parse the response content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all parent elements with class 'type-fullname' and extract hrefs
    hrefs = []
    for element in soup.find_all(class_='type-fullname'):
        # Find all anchor tags inside the parent element
        a_tag = element.find('a')
        if a_tag and a_tag.has_attr('href'):
            hrefs.append(a_tag['href'])

    hrefs = [hrefs[0]]
    
    # Iterate through all the hrefs and send a GET request to each concatenated URL
    for href in hrefs:
        full_url = base_url + href
        get_response = requests.get(full_url)
        
        if get_response.status_code == 200:
            # Parse the HTML content from the GET response
            get_soup = BeautifulSoup(get_response.text, 'html.parser')
            
            # Initialize a dictionary to store the pivoted data for this page
            page_data = {}
            pivoted_tables = []
            
            # Find the specific classes and extract the tables within those classes
            target_classes = ['box-general', 'box-totals', 'box-state']
            for target_class in target_classes:
                # Find the parent element by class
                box = get_soup.find(class_=target_class)
                
                if box:
                    # Extract tables within this parent element
                    html_string = str(box)
                    tables = pd.read_html(StringIO(html_string))
                    
                    for table in tables:
                        # Pivoting the DataFrame
                        pivoted_df = table.set_index(0).T
                        
                        # Renaming the columns to match the first row's values
                        pivoted_df.columns.name = None

                        pivoted_tables.append(pivoted_df)

            split_html = get_soup.find(class_='box-splits')
            html_string = str(split_html)
            table = pd.read_html(StringIO(html_string))[0]

            # Create a flattened dictionary for the new row format
            flattened_data = {}
            
            # Custom labels for columns
            labels = ['arr25', 'dep26', 'arr53', 'dep54', 'arr73', 'dep74', 'finish']
            
            # Iterate over the DataFrame rows and add to the dictionary
            for i, label in enumerate(labels):
                flattened_data[f'{label}_tod'] = table.loc[i, 'Time Of Day']
                flattened_data[f'{label}_time'] = table.loc[i, 'Time']
                flattened_data[f'{label}_diff'] = table.loc[i, 'diff.']
                flattened_data[f'{label}_mph'] = table.loc[i, 'mph']
            
            # Convert the flattened data dictionary back into a DataFrame with one row
            split_df = pd.DataFrame([flattened_data])

            pivoted_tables.append(split_df)

            concat_frame = pd.concat(pivoted_tables, ignore_index=False)
            # print(concat_frame)
            
else:
    print(f"POST request failed with status code: {response.status_code}")


concat_frame
# Display the final concatenated DataFrame
# print("Final DataFrame with one row per page:")
# final_df.to_csv('final_table.csv')


Unnamed: 0,Name,Rider number,Charity,Event,Finish,Status,Last Timing Point,arr25_tod,arr25_time,arr25_diff,...,arr73_diff,arr73_mph,dep74_tod,dep74_time,dep74_diff,dep74_mph,finish_tod,finish_time,finish_diff,finish_mph
0,Patrick A Brady (GBR),120305.0,,Ford RideLondon-Essex 100,,,,,,,...,,,,,,,,,,
1,,,,,07:06:58,,,,,,...,,,,,,,,,,
2,,,,,,Finished,Finish,,,,...,,,,,,,,,,
3,,,,,,,,09:40:13,01:40:49,01:40:49,...,01:12:25,15.76,13:41:52,05:42:29,17:47,3.35,16:00:56,08:01:32,02:19:03,12.63
