In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

class WebScraper:
    def __init__(self, start_url):
        self.start_url = start_url
        self.soup = None
        self.tables = []
        self.fetch_data()

    def fetch_data(self):
        try:
            response = requests.get(self.start_url)
            response.raise_for_status() # Raise an exception if the request was unsuccessful
            self.soup = BeautifulSoup(response.content, 'html.parser')
            self.tables = self.soup.find_all("table")
        except requests.RequestException as e:
            print(f"Error fetching URL: {e}")

    def get_data_table(self):
        # Assuming the first table is the data table
        if len(self.tables) < 2:
            print("Data table not found.")
            return None
        rows = self.tables[1].find_all("tr")
        rows_list = []
        for row in rows:
            cols = row.find_all("td")
            cols_list = [col.text for col in cols if "d-sm-none" not in col.get("class", [])]
            if cols_list: # Ensure the list is not empty
                rows_list.append(cols_list)
        if not rows_list:
            print("No data found in the table.")
            return None
        df = pd.DataFrame(rows_list)
        df.columns = df.iloc[0]
        df = df.iloc[1:, :]
        return df

    def get_page_list(self):
        # Assuming the first table contains links to other pages
        if not self.tables:
            print("No tables found.")
            return None
        links = [a['href'] for a in self.tables[0].find_all("a", href=True)]
        return links

    def iterate_over_pages(self):
        # Get the list of URLs from the first page
        urls = self.get_page_list()
        if urls is None:
            print("No URLs found for iteration.")
            return
        # Iterate over each URL and process it
        for url in urls:
            # You might want to handle relative URLs here
            self.start_url = url
            self.fetch_data()
            data_table = self.get_data_table()
            if data_table is not None:
                # Process the data table as needed
                print(data_table)

In [6]:
url = "https://runnersunite.racetecresults.com/results.aspx?CId=16634&RId=1189&EId=2"
km1 = WebScraper(start_url=url)

In [7]:
d  = km1.get_data_table()