In [1]:
#!pip install cloudscraper

In [5]:
import cloudscraper
import pandas as pd

In [6]:
def scrape_fixtures_table(url, attrs):
    scraper = cloudscraper.create_scraper()

    html = scraper.get(url).text
    tables = pd.read_html(html, attrs=attrs)
    
    return tables[0]

In [7]:
def clean_and_save_fixtures(df, output_path):
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    today = pd.Timestamp.today().normalize()
    df = df[df["Date"] <= today].copy()

    
    if "Score" in df.columns:
        df["Score"] = df["Score"].astype(str).str.replace("â€“", "-", regex=False)

        goals = df["Score"].astype(str).str.split("-", expand=True)
        df["Home_Goals"] = goals[0].astype("Int64")
        df["Away_Goals"] = goals[1].astype("Int64")

    drop_cols = ["Wk","Day","Time","Score","Attendance","Venue",
                 "Referee","Match Report","Notes"]
    drop_cols = [c for c in drop_cols if c in df.columns]
    df = df.drop(columns=drop_cols)
    rename_cols = {"xG": "Home_xG", "xG.1": "Away_xG", "Home": "Home_Team", "Away": "Away_team"}
    df = df.rename(columns=rename_cols)

    df.to_csv(output_path, index=False)
    return df

In [8]:
fixtures = [
    ("2020-2021",
     "https://fbref.com/en/comps/9/2020-2021/schedule/2020-2021-Premier-League-Scores-and-Fixtures",
     "sched_2020-2021_9_1"),
    
    ("2021-2022",
     "https://fbref.com/en/comps/9/2021-2022/schedule/2021-2022-Premier-League-Scores-and-Fixtures",
     "sched_2021-2022_9_1"),

    ("2022-2023",
     "https://fbref.com/en/comps/9/2022-2023/schedule/2022-2023-Premier-League-Scores-and-Fixtures",
     "sched_2022-2023_9_1"),

    ("2023-2024",
     "https://fbref.com/en/comps/9/2023-2024/schedule/2023-2024-Premier-League-Scores-and-Fixtures",
     "sched_2023-2024_9_1"),

    ("2024-2025",
     "https://fbref.com/en/comps/9/2024-2025/schedule/2024-2025-Premier-League-Scores-and-Fixtures",
     "sched_2024-2025_9_1"),

    ("2025-2026",
     "https://fbref.com/en/comps/9/2025-2026/schedule/2025-2026-Premier-League-Scores-and-Fixtures",
     "sched_2025-2026_9_1")
]

for season, url, table_id in fixtures:
    attrs = {"id": table_id}
    df = scrape_fixtures_table(url, attrs)
    output_path = f"../CSV_files/{season.replace('-', '')}_fixtures.csv"

    clean_and_save_fixtures(df, output_path)
    print("Saved:", output_path)

  tables = pd.read_html(html, attrs=attrs)


Saved: ../CSV_files/20202021_fixtures.csv


  tables = pd.read_html(html, attrs=attrs)


Saved: ../CSV_files/20212022_fixtures.csv


  tables = pd.read_html(html, attrs=attrs)


Saved: ../CSV_files/20222023_fixtures.csv


  tables = pd.read_html(html, attrs=attrs)


Saved: ../CSV_files/20232024_fixtures.csv


  tables = pd.read_html(html, attrs=attrs)


Saved: ../CSV_files/20242025_fixtures.csv
Saved: ../CSV_files/20252026_fixtures.csv


  tables = pd.read_html(html, attrs=attrs)
