In [19]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_table(url, table_class):
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to retrieve the web page. Status code: {response.status_code}")
        return None

    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table', class_=table_class)
    if table is None:
        print(f"Couldn't find the table with class {table_class}.")
        return None

    # Extract headers from the first row of the table body
    tbody = table.find('tbody')
    if tbody is None or len(tbody.find_all('tr')) == 0:
        print("No rows found in the table body.")
        return None

    # Assuming the first row contains headers
    headers = [cell.text.strip() for cell in tbody.find_all('tr')[0].find_all('td')]

    rows = []

    # Extract data from the remaining rows
    for row in tbody.find_all('tr')[1:]:  # Skip the first row (header)
        cells = row.find_all('td')
        row_data = [cell.text.strip() for cell in cells]
        
        if len(row_data) == len(headers):
            rows.append(row_data)
        else:
            print(f"Skipping row with mismatched columns: expected {len(headers)}, found {len(row_data)}")

    df = pd.DataFrame(rows, columns=headers)

    return df

# URL to scrape
url = 'https://footballdatabase.com/ranking/england/1'

# Table class
table_class = 'table table-hover'  # Modify this to match the table's class attribute

# Scrape the table
df = scrape_table(url, table_class)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'premier_league_overall_stats_EE.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")


Skipping row with mismatched columns: expected 4, found 1
Data scraped and saved to premier_league_overall_stats_EE.csv


In [23]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_table(url, table_class):
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to retrieve the web page. Status code: {response.status_code}")
        return None

    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table', class_=table_class)
    if table is None:
        print(f"Couldn't find the table with class {table_class}.")
        return None

    # Dynamically extract headers
    header_row = table.find('thead').find('tr') if table.find('thead') else table.find('tbody').find('tr')
    headers = [cell.text.strip() for cell in header_row.find_all(['th', 'td'])]

    rows = []
    for row in table.find('tbody').find_all('tr'):
        cells = row.find_all('td')
        if not cells:
            continue  # Skip rows without data cells
        row_data = [cell.text.strip() for cell in cells]
        rows.append(row_data)

    df = pd.DataFrame(rows, columns=headers)

    return df

def scrape_multiple_pages(base_url, table_class, total_pages):
    all_data = []

    for page in range(1, total_pages + 1):
        url = f"{base_url}{page}"
        print(f"Scraping {url}")

        df = scrape_table(url, table_class)
        if df is not None and not df.empty:
            all_data.append(df)
        else:
            print(f"Data could not be scraped for page {page}.")

    # Combine all DataFrames into one
    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df

# Base URL and table class
base_url = 'https://footballdatabase.com/ranking/europe/'
table_class = 'table table-hover'

# Scrape all 5 pages
combined_df = scrape_multiple_pages(base_url, table_class, 5)

# Save the combined DataFrame to a CSV file
if not combined_df.empty:
    file_path = 'combined_european_league_stats_fixed.csv'
    combined_df.to_csv(file_path, index=False)
    print(f"All data scraped and saved to {file_path}")
else:
    print("No data was scraped.")


Scraping https://footballdatabase.com/ranking/europe/1
Scraping https://footballdatabase.com/ranking/europe/2
Scraping https://footballdatabase.com/ranking/europe/3
Scraping https://footballdatabase.com/ranking/europe/4
Scraping https://footballdatabase.com/ranking/europe/5
All data scraped and saved to combined_european_league_stats_fixed.csv
