In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_table(url, table_id):
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve the web page. Status code: {response.status_code}")
        return None

    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the table by its ID
    table = soup.find('table', id=table_id)
    if table is None:
        print(f"Couldn't find the table with id {table_id}.")
        return None

    # Initialize lists to hold the headers and the rows
    headers = [header.text for header in table.find_all('th', scope='col')]
    rows = []

    # Extract the rows from the table body
    for row in table.find('tbody').find_all('tr'):
        # Row might start with a 'th' element for row header
        row_header = row.find('th', scope='row')
        row_header_text = row_header.text.strip() if row_header else None
        cells = row.find_all('td')
        row_data = [row_header_text] if row_header_text else []
        row_data.extend([cell.text.strip() for cell in cells])
        
        if len(row_data) == len(headers):
            rows.append(row_data)
        else:
            print(f"Skipping row with mismatched columns: expected {len(headers)}, found {len(row_data)}")

    # Create a DataFrame using the headers and rows
    df = pd.DataFrame(rows, columns=headers)

    return df

# URL to scrape
url = 'https://fbref.com/en/comps/12/La-Liga-Stats'

# Table ID
table_id = 'results2023-2024121_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'La_Liga_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
# URL to scrape
url = 'https://fbref.com/en/comps/12/La-Liga-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'La_Liga_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('La_Liga_overall_stats.csv')
df2 = pd.read_csv('La_Liga_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_laliga = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_La_Liga_stats.csv'
merged_df_laliga.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Data scraped and saved to La_Liga_overall_stats.csv
Data scraped and saved to La_Liga_overall_stats_squad.csv
Merged data saved to merged_La_Liga_stats.csv


In [9]:
# Premier League
# URL to scrape
url = 'https://fbref.com/en/comps/9/Premier-League-Stats'

# Table ID
table_id = 'results2023-202491_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'premier_league_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/9/Premier-League-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'premier_league_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('premier_league_overall_stats.csv')
df2 = pd.read_csv('premier_league_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_premier = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_premier_league_stats.csv'
merged_df_premier.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")


Data scraped and saved to premier_league_overall_stats.csv
Data scraped and saved to premier_league_overall_stats_squad.csv
Merged data saved to merged_premier_league_stats.csv


In [10]:
# Seria A
# URL to scrape
url = 'https://fbref.com/en/comps/11/Serie-A-Stats'

# Table ID
table_id = 'results2023-2024111_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'SerieA_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/11/Serie-A-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'SerieA_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('SerieA_overall_stats.csv')
df2 = pd.read_csv('SerieA_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_SerieA = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_SerieA_stats.csv'
merged_df_SerieA.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Data scraped and saved to SerieA_overall_stats.csv
Data scraped and saved to SerieA_overall_stats_squad.csv
Merged data saved to merged_SerieA_stats.csv


In [11]:
# Bundesliga
# URL to scrape
url = 'https://fbref.com/en/comps/20/Bundesliga-Stats'

# Table ID
table_id = 'results2023-2024201_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Bundesliga_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/20/Bundesliga-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Bundesliga_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('Bundesliga_overall_stats.csv')
df2 = pd.read_csv('Bundesliga_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_Bundesliga = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_Bundesliga_stats.csv'
merged_df_Bundesliga.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Data scraped and saved to Bundesliga_overall_stats.csv
Data scraped and saved to Bundesliga_overall_stats_squad.csv
Merged data saved to merged_Bundesliga_stats.csv


In [12]:
# Ligue1
# URL to scrape
url = 'https://fbref.com/en/comps/13/Ligue-1-Stats'

# Table ID
table_id = 'results2023-2024131_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Ligue1_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/13/Ligue-1-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Ligue1_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('Ligue1_overall_stats.csv')
df2 = pd.read_csv('Ligue1_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_Ligue1 = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_Ligue1_stats.csv'
merged_df_Ligue1.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Data scraped and saved to Ligue1_overall_stats.csv
Data scraped and saved to Ligue1_overall_stats_squad.csv
Merged data saved to merged_Ligue1_stats.csv


In [13]:
# Eredivisie
# URL to scrape
url = 'https://fbref.com/en/comps/23/Eredivisie-Stats'

# Table ID
table_id = 'results2023-2024231_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Eredivisie_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/23/Eredivisie-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Eredivisie_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('Eredivisie_overall_stats.csv')
df2 = pd.read_csv('Eredivisie_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_Eredivisie = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_Eredivisie_stats.csv'
merged_df_Eredivisie.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Data scraped and saved to Eredivisie_overall_stats.csv
Data scraped and saved to Eredivisie_overall_stats_squad.csv
Merged data saved to merged_Eredivisie_stats.csv


In [14]:
# Primeira
# URL to scrape
url = 'https://fbref.com/en/comps/32/Primeira-Liga-Stats'

# Table ID
table_id = 'results2023-2024321_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Primeira_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/32/Primeira-Liga-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Primeira_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('Primeira_overall_stats.csv')
df2 = pd.read_csv('Primeira_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_Primeira = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_Primeira_stats.csv'
merged_df_Primeira.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Data scraped and saved to Primeira_overall_stats.csv
Data scraped and saved to Primeira_overall_stats_squad.csv
Merged data saved to merged_Primeira_stats.csv


In [15]:
# Belgian Pro League
# URL to scrape
url = 'https://fbref.com/en/comps/37/Belgian-Pro-League-Stats'

# Table ID
table_id = 'results2023-2024370_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Belgian_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/37/Belgian-Pro-League-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Belgian_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('Belgian_overall_stats.csv')
df2 = pd.read_csv('Belgian_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_Belgian = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_Belgian_stats.csv'
merged_df_Belgian.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Data scraped and saved to Belgian_overall_stats.csv
Data scraped and saved to Belgian_overall_stats_squad.csv
Merged data saved to merged_Belgian_stats.csv


In [17]:
# Scottish premier league
# URL to scrape
url = 'https://fbref.com/en/comps/40/Scottish-Premiership-Stats'

# Table ID
table_id = 'results2023-2024401_overall'  # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Scottish_overall_stats.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
  
# URL to scrape
url = 'https://fbref.com/en/comps/40/Scottish-Premiership-Stats'

# Table ID
table_id = "stats_squads_standard_for"   # Make sure this ID is correct

# Scrape the table
df = scrape_table(url, table_id)

# Check if the DataFrame is not empty
if df is not None and not df.empty:
    # Save the DataFrame to a CSV file
    file_path = 'Scottish_overall_stats_squad.csv'
    df.to_csv(file_path, index=False)
    print(f"Data scraped and saved to {file_path}")
else:
    print("Data could not be scraped.")
    
    
import pandas as pd

# Load the datasets
df1 = pd.read_csv('Scottish_overall_stats.csv')
df2 = pd.read_csv('Scottish_overall_stats_squad.csv')

# Merge the dataframes on the team name column
# Assuming the team name column is named 'Team' in both CSV files
merged_df_Scottish = pd.merge(df1, df2, on='Squad')

# Save the merged dataframe to a new CSV file
merged_file_path = 'merged_Scottish_stats.csv'
merged_df_Scottish.to_csv(merged_file_path, index=False)

print(f"Merged data saved to {merged_file_path}")

Skipping row with mismatched columns: expected 13, found 12
Data scraped and saved to Scottish_overall_stats.csv
Data scraped and saved to Scottish_overall_stats_squad.csv
Merged data saved to merged_Scottish_stats.csv


In [21]:
# Concatenate the DataFrames
combined_df = pd.concat([merged_df_laliga, merged_df_premier, merged_df_SerieA , merged_df_Bundesliga, merged_df_Ligue1, merged_df_Eredivisie, merged_df_Primeira, merged_df_Belgian,merged_df_Scottish ], ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_file_path = 'combined_football_league_stats.csv'
combined_df.to_csv(combined_file_path, index=False)

combined_file_path


'combined_football_league_stats.csv'