In [2]:
#!pip install cloudscraper

Collecting cloudscraper
  Downloading cloudscraper-1.2.71-py2.py3-none-any.whl.metadata (19 kB)
Downloading cloudscraper-1.2.71-py2.py3-none-any.whl (99 kB)
Installing collected packages: cloudscraper
Successfully installed cloudscraper-1.2.71


In [5]:
import re
import pandas as pd
import cloudscraper


def scrape_fbref_leaderboard(url: str, table_id: str) -> pd.DataFrame:
    scraper = cloudscraper.create_scraper()
    html = scraper.get(url).text

    tables = pd.read_html(html, attrs={"id": table_id})
    if not tables:
        raise ValueError(f"Table with id '{table_id}' wasn't found on the page {url}")

    df = tables[0]

    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [
            " ".join([str(c) for c in col if str(c) != ""]).strip()
            for col in df.columns
        ]
    else:
        df.columns = [str(c).strip() for c in df.columns]

    def clean_col(col: str) -> str:
        col = str(col)
        col = col.replace("\xa0", " ")
        col = re.sub(r"[_Â·]+", " ", col)
        col = re.sub(r"\s+", " ", col)
        return col.strip()

    df.columns = [clean_col(c) for c in df.columns]

    cols_to_drop = ["Attendance", "Top Team Scorer", "Goalkeeper", "Notes"]
    existing_to_drop = [c for c in cols_to_drop if c in df.columns]
    if existing_to_drop:
        df = df.drop(columns=existing_to_drop)

    return df


def save_to_csv(df: pd.DataFrame, filename: str, index: bool = False) -> None:
    df.to_csv(filename, index=index)

In [6]:
standings25_26 = scrape_fbref_leaderboard("https://fbref.com/en/comps/9/Premier-League-Stats", "results2025-202691_overall")
standings24_25 = scrape_fbref_leaderboard("https://fbref.com/en/comps/9/2024-2025/2024-2025-Premier-League-Stats", "results2024-202591_overall")
standings23_24 = scrape_fbref_leaderboard("https://fbref.com/en/comps/9/2023-2024/2023-2024-Premier-League-Stats", "results2023-202491_overall")

  tables = pd.read_html(html, attrs={"id": table_id})
  tables = pd.read_html(html, attrs={"id": table_id})
  tables = pd.read_html(html, attrs={"id": table_id})


In [7]:
standings25_26.head()

Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Last 5
0,1,Arsenal,11,8,2,1,20,5,15,26,2.36,18.8,6.0,12.8,1.16,W W W W D
1,2,Manchester City,11,7,1,3,23,8,15,22,2.0,19.3,10.3,9.0,0.82,W W L W W
2,3,Chelsea,11,6,2,3,21,11,10,20,1.82,20.4,13.4,6.9,0.63,W W L W W
3,4,Sunderland,11,5,4,2,14,10,4,19,1.73,10.2,13.2,-2.9,-0.27,L W W D D
4,5,Tottenham,11,5,3,3,19,10,9,18,1.64,11.0,15.2,-4.2,-0.38,W L W L D


In [8]:
standings24_25.head()

Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90
0,1,Liverpool,38,25,9,4,86,41,45,84,2.21,82.2,38.6,43.6,1.15
1,2,Arsenal,38,20,14,4,69,34,35,74,1.95,59.9,34.4,25.5,0.67
2,3,Manchester City,38,21,8,9,72,44,28,71,1.87,68.1,47.7,20.4,0.54
3,4,Chelsea,38,20,9,9,64,43,21,69,1.82,67.8,47.3,20.5,0.54
4,5,Newcastle Utd,38,20,6,12,68,47,21,66,1.74,63.8,45.5,18.3,0.48


In [9]:
standings23_24.head()

Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90
0,1,Manchester City,38,28,7,3,96,34,62,91,2.39,80.5,35.6,44.9,1.18
1,2,Arsenal,38,28,5,5,91,29,62,89,2.34,76.1,27.9,48.2,1.27
2,3,Liverpool,38,24,10,4,86,41,45,82,2.16,87.8,45.7,42.0,1.11
3,4,Aston Villa,38,20,8,10,76,61,15,68,1.79,63.3,59.9,3.4,0.09
4,5,Tottenham,38,20,6,12,74,61,13,66,1.74,68.2,63.4,4.8,0.13


In [12]:
save_to_csv(standings25_26, "../CSV_files/2025-2026 Team Leaderboard (ONGOING).csv")

In [14]:
save_to_csv(standings24_25, "../CSV_files/2024-2025 Team Leaderboard (Finished).csv")

In [15]:
save_to_csv(standings23_24, "../CSV_files/2023-2024 Team Leaderboard (Finished).csv")