In [50]:
import pandas as pd
import time
import random
from tqdm import tqdm
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager


In [8]:
# List of all league-season URLs
leagues = {
    '9': 'Premier League',
    '12': 'La Liga',
    '11': 'Serie A',
    '20': 'Bundesliga',
    '13': 'Ligue 1',
    '10': 'Championship',
    '19': 'Major League Soccer',
    '24': 'Brasileirao',
    '31': 'Liga MX',
    '23': 'Eredivisie',
    '32': 'Primeira Liga',
    '39': 'Liga Profesional',
    '37': 'Belgian Pro League',
    '40': 'Serie B'
}

In [75]:
# Seasons
seasons = [f'{year}-{year+1}' for year in range(2013, 2024)]

In [76]:
seasons

['2013-2014',
 '2014-2015',
 '2015-2016',
 '2016-2017',
 '2017-2018',
 '2018-2019',
 '2019-2020',
 '2020-2021',
 '2021-2022',
 '2022-2023',
 '2023-2024']

In [24]:
# URL template: per-player stats
base_url = 'https://fbref.com/en/comps/{code}/{season}/stats/{season}-{league_name}-Stats'
url_list = []

for code, name in leagues.items():
    for season in seasons:
        league_name_url = name.replace(" ", "-").replace(".", "")
        url = base_url.format(code=code, season=season, league_name=league_name_url)
        url_list.append((name, season, url))

In [77]:
base_url = 'https://fbref.com/en/comps/{code}/{season}/stats/{season}-{league_name}-Stats'
last_season = []

for code, name in leagues.items():
        season = '2024-2025'
        league_name_url = name.replace(" ", "-").replace(".", "")
        url = base_url.format(code=code, season=season, league_name=league_name_url)
        last_season.append((name, season, url))

In [82]:
# --- SELENIUM SETUP ---
options = webdriver.ChromeOptions()
options.add_argument("--headless=new")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)


In [62]:
# --- SCRAPING LOOP ---
all_data = []

for league, season, url in tqdm(url_list):
    try:
        print(f"\n🔍 Scraping {league} {season}...")

        driver.get(url)
        time.sleep(random.uniform(5, 7))  # Wait for JS to load

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        table = soup.find('table', {'id': 'stats_standard'})
        if table is None:
            print(f"⚠️ No player stats table found at {url}")
            continue

        df = pd.read_html(str(table), header=[0, 1])[0]
        df.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
        df.columns = [ col.replace("Unnamed: ", "").split(" ", 1)[-1] if col.startswith("Unnamed: ") else col for col in df.columns ]
        df = df[df['Player'].values != 'Player']  # Remove internal header rows
        df['League'] = league
        df['Season'] = season
        all_data.append(df)

        print(f"✅ {len(df)} players scraped.")
        time.sleep(random.uniform(1, 3))

    except Exception as e:
        print(f"❌ Error scraping {league} {season}: {e}")
        time.sleep(random.uniform(3, 6))

# --- CLEANUP ---
driver.quit()

# --- RESULT ---
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    print(f"\n🎉 Done! Collected {combined_df.shape[0]} player-season rows across {len(all_data)} tables.")
else:
    print("🚫 No data was scraped.")

  0%|          | 0/154 [00:00<?, ?it/s]


🔍 Scraping Premier League 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 561 players scraped.


  1%|          | 1/154 [00:09<23:02,  9.03s/it]


🔍 Scraping Premier League 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 549 players scraped.


  1%|▏         | 2/154 [00:20<26:29, 10.46s/it]


🔍 Scraping Premier League 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 561 players scraped.


  2%|▏         | 3/154 [00:30<25:38, 10.19s/it]


🔍 Scraping Premier League 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 543 players scraped.


  3%|▎         | 4/154 [00:40<25:50, 10.33s/it]


🔍 Scraping Premier League 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 529 players scraped.


  3%|▎         | 5/154 [00:52<26:23, 10.63s/it]


🔍 Scraping Premier League 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 508 players scraped.


  4%|▍         | 6/154 [01:04<27:24, 11.11s/it]


🔍 Scraping Premier League 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 522 players scraped.


  5%|▍         | 7/154 [01:17<28:49, 11.76s/it]


🔍 Scraping Premier League 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 532 players scraped.


  5%|▌         | 8/154 [01:28<28:15, 11.61s/it]


🔍 Scraping Premier League 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 546 players scraped.


  6%|▌         | 9/154 [01:40<28:36, 11.84s/it]


🔍 Scraping Premier League 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 569 players scraped.


  6%|▋         | 10/154 [01:52<28:34, 11.90s/it]


🔍 Scraping Premier League 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 580 players scraped.


  7%|▋         | 11/154 [02:03<27:35, 11.58s/it]


🔍 Scraping La Liga 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 542 players scraped.


  8%|▊         | 12/154 [02:16<28:31, 12.05s/it]


🔍 Scraping La Liga 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 527 players scraped.


  8%|▊         | 13/154 [02:27<27:26, 11.68s/it]


🔍 Scraping La Liga 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 546 players scraped.


  9%|▉         | 14/154 [02:41<28:50, 12.36s/it]


🔍 Scraping La Liga 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 555 players scraped.


 10%|▉         | 15/154 [02:55<29:31, 12.74s/it]


🔍 Scraping La Liga 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 573 players scraped.


 10%|█         | 16/154 [03:09<30:03, 13.07s/it]


🔍 Scraping La Liga 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 544 players scraped.


 11%|█         | 17/154 [03:22<30:00, 13.14s/it]


🔍 Scraping La Liga 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 570 players scraped.


 12%|█▏        | 18/154 [03:35<29:57, 13.22s/it]


🔍 Scraping La Liga 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 582 players scraped.


 12%|█▏        | 19/154 [03:50<30:35, 13.60s/it]


🔍 Scraping La Liga 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 616 players scraped.


 13%|█▎        | 20/154 [04:04<30:29, 13.65s/it]


🔍 Scraping La Liga 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 596 players scraped.


 14%|█▎        | 21/154 [04:20<32:18, 14.58s/it]


🔍 Scraping La Liga 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 609 players scraped.


 14%|█▍        | 22/154 [04:33<30:50, 14.02s/it]


🔍 Scraping Serie A 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 611 players scraped.


 15%|█▍        | 23/154 [04:48<31:27, 14.41s/it]


🔍 Scraping Serie A 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 609 players scraped.


 16%|█▌        | 24/154 [05:03<31:06, 14.36s/it]


🔍 Scraping Serie A 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 584 players scraped.


 16%|█▌        | 25/154 [05:14<29:05, 13.53s/it]


🔍 Scraping Serie A 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 582 players scraped.


 17%|█▋        | 26/154 [05:29<29:25, 13.79s/it]


🔍 Scraping Serie A 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 553 players scraped.


 18%|█▊        | 27/154 [05:43<29:51, 14.10s/it]


🔍 Scraping Serie A 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 569 players scraped.


 18%|█▊        | 28/154 [05:57<29:37, 14.11s/it]


🔍 Scraping Serie A 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 600 players scraped.


 19%|█▉        | 29/154 [06:11<29:18, 14.07s/it]


🔍 Scraping Serie A 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 619 players scraped.


 19%|█▉        | 30/154 [06:27<30:00, 14.52s/it]


🔍 Scraping Serie A 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 632 players scraped.


 20%|██        | 31/154 [06:41<29:08, 14.22s/it]


🔍 Scraping Serie A 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 603 players scraped.


 21%|██        | 32/154 [06:51<26:47, 13.18s/it]


🔍 Scraping Serie A 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 616 players scraped.


 21%|██▏       | 33/154 [07:03<25:34, 12.69s/it]


🔍 Scraping Bundesliga 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 476 players scraped.


 22%|██▏       | 34/154 [07:14<24:44, 12.37s/it]


🔍 Scraping Bundesliga 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 462 players scraped.


 23%|██▎       | 35/154 [07:25<23:16, 11.73s/it]


🔍 Scraping Bundesliga 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 492 players scraped.


 23%|██▎       | 36/154 [07:37<23:09, 11.78s/it]


🔍 Scraping Bundesliga 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 475 players scraped.


 24%|██▍       | 37/154 [07:50<24:10, 12.40s/it]


🔍 Scraping Bundesliga 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 484 players scraped.


 25%|██▍       | 38/154 [08:04<24:42, 12.78s/it]


🔍 Scraping Bundesliga 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 474 players scraped.


 25%|██▌       | 39/154 [08:17<24:46, 12.93s/it]


🔍 Scraping Bundesliga 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 498 players scraped.


 26%|██▌       | 40/154 [08:29<24:02, 12.66s/it]


🔍 Scraping Bundesliga 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 505 players scraped.


 27%|██▋       | 41/154 [08:41<23:13, 12.34s/it]


🔍 Scraping Bundesliga 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 523 players scraped.


 27%|██▋       | 42/154 [08:57<24:51, 13.31s/it]


🔍 Scraping Bundesliga 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 515 players scraped.


 28%|██▊       | 43/154 [09:08<23:36, 12.76s/it]


🔍 Scraping Bundesliga 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 507 players scraped.


 29%|██▊       | 44/154 [09:19<22:25, 12.23s/it]


🔍 Scraping Ligue 1 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 560 players scraped.


 29%|██▉       | 45/154 [09:33<23:11, 12.76s/it]


🔍 Scraping Ligue 1 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 545 players scraped.


 30%|██▉       | 46/154 [09:47<23:32, 13.08s/it]


🔍 Scraping Ligue 1 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 588 players scraped.


 31%|███       | 47/154 [10:00<23:24, 13.13s/it]


🔍 Scraping Ligue 1 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 575 players scraped.


 31%|███       | 48/154 [10:13<23:02, 13.04s/it]


🔍 Scraping Ligue 1 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 553 players scraped.


 32%|███▏      | 49/154 [10:27<23:21, 13.35s/it]


🔍 Scraping Ligue 1 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 561 players scraped.


 32%|███▏      | 50/154 [10:42<24:12, 13.97s/it]


🔍 Scraping Ligue 1 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 542 players scraped.


 33%|███▎      | 51/154 [10:55<23:12, 13.52s/it]


🔍 Scraping Ligue 1 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 584 players scraped.


 34%|███▍      | 52/154 [11:08<22:33, 13.27s/it]


🔍 Scraping Ligue 1 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 604 players scraped.


 34%|███▍      | 53/154 [11:20<22:08, 13.15s/it]


🔍 Scraping Ligue 1 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 606 players scraped.


 35%|███▌      | 54/154 [11:33<21:49, 13.10s/it]


🔍 Scraping Ligue 1 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 540 players scraped.


 36%|███▌      | 55/154 [11:47<21:39, 13.13s/it]


🔍 Scraping Championship 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 779 players scraped.


 36%|███▋      | 56/154 [12:00<21:19, 13.06s/it]


🔍 Scraping Championship 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 820 players scraped.


 37%|███▋      | 57/154 [12:13<21:31, 13.31s/it]


🔍 Scraping Championship 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 738 players scraped.


 38%|███▊      | 58/154 [12:27<21:29, 13.43s/it]


🔍 Scraping Championship 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 749 players scraped.


 38%|███▊      | 59/154 [12:42<21:48, 13.77s/it]


🔍 Scraping Championship 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 730 players scraped.


 39%|███▉      | 60/154 [12:55<21:28, 13.71s/it]


🔍 Scraping Championship 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 708 players scraped.


 40%|███▉      | 61/154 [13:10<21:56, 14.16s/it]


🔍 Scraping Championship 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 721 players scraped.


 40%|████      | 62/154 [13:26<22:25, 14.62s/it]


🔍 Scraping Championship 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 736 players scraped.


 41%|████      | 63/154 [13:42<22:41, 14.96s/it]


🔍 Scraping Championship 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 746 players scraped.


 42%|████▏     | 64/154 [13:55<21:40, 14.45s/it]


🔍 Scraping Championship 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 750 players scraped.


 42%|████▏     | 65/154 [14:08<20:32, 13.85s/it]


🔍 Scraping Championship 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 735 players scraped.


 43%|████▎     | 66/154 [14:20<19:37, 13.38s/it]


🔍 Scraping Major League Soccer 2013-2014...


 44%|████▎     | 67/154 [14:32<18:44, 12.92s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2013-2014/stats/2013-2014-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2014-2015...


 44%|████▍     | 68/154 [14:44<18:03, 12.60s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2014-2015/stats/2014-2015-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2015-2016...


 45%|████▍     | 69/154 [14:55<17:18, 12.22s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2015-2016/stats/2015-2016-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2016-2017...


 45%|████▌     | 70/154 [15:07<17:09, 12.26s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2016-2017/stats/2016-2017-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2017-2018...


 46%|████▌     | 71/154 [15:20<17:12, 12.44s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2017-2018/stats/2017-2018-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2018-2019...


 47%|████▋     | 72/154 [15:34<17:32, 12.83s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2018-2019/stats/2018-2019-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2019-2020...


 47%|████▋     | 73/154 [15:47<17:22, 12.87s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2019-2020/stats/2019-2020-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2020-2021...


 48%|████▊     | 74/154 [16:00<17:23, 13.04s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2020-2021/stats/2020-2021-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2021-2022...


 49%|████▊     | 75/154 [16:08<14:51, 11.29s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2021-2022/stats/2021-2022-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2022-2023...


 49%|████▉     | 76/154 [16:18<14:32, 11.19s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2022-2023/stats/2022-2023-Major-League-Soccer-Stats

🔍 Scraping Major League Soccer 2023-2024...


 50%|█████     | 77/154 [16:29<14:10, 11.04s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2023-2024/stats/2023-2024-Major-League-Soccer-Stats

🔍 Scraping Brasileirao 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 51%|█████     | 78/154 [16:44<15:21, 12.12s/it]


🔍 Scraping Brasileirao 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 51%|█████▏    | 79/154 [16:58<15:51, 12.69s/it]


🔍 Scraping Brasileirao 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 52%|█████▏    | 80/154 [17:12<16:19, 13.23s/it]


🔍 Scraping Brasileirao 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 53%|█████▎    | 81/154 [17:26<16:14, 13.35s/it]


🔍 Scraping Brasileirao 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 53%|█████▎    | 82/154 [17:39<16:02, 13.37s/it]


🔍 Scraping Brasileirao 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 54%|█████▍    | 83/154 [17:53<16:03, 13.56s/it]


🔍 Scraping Brasileirao 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 55%|█████▍    | 84/154 [18:08<16:12, 13.89s/it]


🔍 Scraping Brasileirao 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 55%|█████▌    | 85/154 [18:22<16:09, 14.06s/it]


🔍 Scraping Brasileirao 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 56%|█████▌    | 86/154 [18:35<15:24, 13.60s/it]


🔍 Scraping Brasileirao 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 56%|█████▋    | 87/154 [18:48<14:52, 13.32s/it]


🔍 Scraping Brasileirao 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 531 players scraped.


 57%|█████▋    | 88/154 [19:02<14:58, 13.61s/it]


🔍 Scraping Liga MX 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 567 players scraped.


 58%|█████▊    | 89/154 [19:16<14:45, 13.62s/it]


🔍 Scraping Liga MX 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 547 players scraped.


 58%|█████▊    | 90/154 [19:27<13:47, 12.92s/it]


🔍 Scraping Liga MX 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 563 players scraped.


 59%|█████▉    | 91/154 [19:40<13:37, 12.97s/it]


🔍 Scraping Liga MX 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 561 players scraped.


 60%|█████▉    | 92/154 [19:53<13:25, 12.99s/it]


🔍 Scraping Liga MX 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 546 players scraped.


 60%|██████    | 93/154 [20:07<13:21, 13.14s/it]


🔍 Scraping Liga MX 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 573 players scraped.


 61%|██████    | 94/154 [20:19<12:56, 12.94s/it]


🔍 Scraping Liga MX 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 572 players scraped.


 62%|██████▏   | 95/154 [20:32<12:51, 13.08s/it]


🔍 Scraping Liga MX 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 588 players scraped.


 62%|██████▏   | 96/154 [20:44<12:21, 12.78s/it]


🔍 Scraping Liga MX 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 601 players scraped.


 63%|██████▎   | 97/154 [20:56<11:41, 12.32s/it]


🔍 Scraping Liga MX 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 586 players scraped.


 64%|██████▎   | 98/154 [21:09<11:51, 12.70s/it]


🔍 Scraping Liga MX 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 622 players scraped.


 64%|██████▍   | 99/154 [21:21<11:22, 12.41s/it]


🔍 Scraping Eredivisie 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 464 players scraped.


 65%|██████▍   | 100/154 [21:34<11:21, 12.61s/it]


🔍 Scraping Eredivisie 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 480 players scraped.


 66%|██████▌   | 101/154 [21:46<10:53, 12.32s/it]


🔍 Scraping Eredivisie 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 473 players scraped.


 66%|██████▌   | 102/154 [21:58<10:34, 12.21s/it]


🔍 Scraping Eredivisie 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 484 players scraped.


 67%|██████▋   | 103/154 [22:10<10:20, 12.17s/it]


🔍 Scraping Eredivisie 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 477 players scraped.


 68%|██████▊   | 104/154 [22:21<10:01, 12.03s/it]


🔍 Scraping Eredivisie 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 479 players scraped.


 68%|██████▊   | 105/154 [22:33<09:40, 11.84s/it]


🔍 Scraping Eredivisie 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 484 players scraped.


 69%|██████▉   | 106/154 [22:44<09:22, 11.72s/it]


🔍 Scraping Eredivisie 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 522 players scraped.


 69%|██████▉   | 107/154 [22:57<09:20, 11.92s/it]


🔍 Scraping Eredivisie 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 533 players scraped.


 70%|███████   | 108/154 [23:10<09:21, 12.21s/it]


🔍 Scraping Eredivisie 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 526 players scraped.


 71%|███████   | 109/154 [23:22<09:13, 12.29s/it]


🔍 Scraping Eredivisie 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 524 players scraped.


 71%|███████▏  | 110/154 [23:34<08:50, 12.06s/it]


🔍 Scraping Primeira Liga 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 470 players scraped.


 72%|███████▏  | 111/154 [23:46<08:44, 12.21s/it]


🔍 Scraping Primeira Liga 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 528 players scraped.


 73%|███████▎  | 112/154 [23:56<08:04, 11.54s/it]


🔍 Scraping Primeira Liga 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 550 players scraped.


 73%|███████▎  | 113/154 [24:09<08:03, 11.80s/it]


🔍 Scraping Primeira Liga 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 544 players scraped.


 74%|███████▍  | 114/154 [24:22<08:06, 12.16s/it]


🔍 Scraping Primeira Liga 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 519 players scraped.


 75%|███████▍  | 115/154 [24:35<08:03, 12.41s/it]


🔍 Scraping Primeira Liga 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 539 players scraped.


 75%|███████▌  | 116/154 [24:49<08:17, 13.08s/it]


🔍 Scraping Primeira Liga 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 562 players scraped.


 76%|███████▌  | 117/154 [25:03<08:11, 13.27s/it]


🔍 Scraping Primeira Liga 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 543 players scraped.


 77%|███████▋  | 118/154 [25:17<08:08, 13.57s/it]


🔍 Scraping Primeira Liga 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 581 players scraped.


 77%|███████▋  | 119/154 [25:32<08:10, 14.03s/it]


🔍 Scraping Primeira Liga 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 580 players scraped.


 78%|███████▊  | 120/154 [25:46<07:57, 14.05s/it]


🔍 Scraping Primeira Liga 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 536 players scraped.


 79%|███████▊  | 121/154 [26:00<07:38, 13.89s/it]


🔍 Scraping Liga Profesional 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 448 players scraped.


 79%|███████▉  | 122/154 [26:11<06:53, 12.92s/it]


🔍 Scraping Liga Profesional 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 460 players scraped.


 80%|███████▉  | 123/154 [26:22<06:24, 12.41s/it]


🔍 Scraping Liga Profesional 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 418 players scraped.


 81%|████████  | 124/154 [26:34<06:14, 12.49s/it]


🔍 Scraping Liga Profesional 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 378 players scraped.


 81%|████████  | 125/154 [26:46<05:52, 12.17s/it]


🔍 Scraping Liga Profesional 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 412 players scraped.


 82%|████████▏ | 126/154 [26:59<05:51, 12.57s/it]


🔍 Scraping Liga Profesional 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 397 players scraped.


 82%|████████▏ | 127/154 [27:10<05:27, 12.13s/it]


🔍 Scraping Liga Profesional 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 405 players scraped.


 83%|████████▎ | 128/154 [27:22<05:09, 11.90s/it]


🔍 Scraping Liga Profesional 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 468 players scraped.


 84%|████████▍ | 129/154 [27:32<04:45, 11.41s/it]


🔍 Scraping Liga Profesional 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 416 players scraped.


 84%|████████▍ | 130/154 [27:43<04:32, 11.34s/it]


🔍 Scraping Liga Profesional 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 521 players scraped.


 85%|████████▌ | 131/154 [27:54<04:17, 11.20s/it]


🔍 Scraping Liga Profesional 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 518 players scraped.


 86%|████████▌ | 132/154 [28:08<04:21, 11.88s/it]


🔍 Scraping Belgian Pro League 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 454 players scraped.


 86%|████████▋ | 133/154 [28:20<04:15, 12.15s/it]


🔍 Scraping Belgian Pro League 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 465 players scraped.


 87%|████████▋ | 134/154 [28:33<04:08, 12.40s/it]


🔍 Scraping Belgian Pro League 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 477 players scraped.


 88%|████████▊ | 135/154 [28:45<03:49, 12.10s/it]


🔍 Scraping Belgian Pro League 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 488 players scraped.


 88%|████████▊ | 136/154 [28:59<03:47, 12.63s/it]


🔍 Scraping Belgian Pro League 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 490 players scraped.


 89%|████████▉ | 137/154 [29:11<03:33, 12.56s/it]


🔍 Scraping Belgian Pro League 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 455 players scraped.


 90%|████████▉ | 138/154 [29:23<03:18, 12.41s/it]


🔍 Scraping Belgian Pro League 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 463 players scraped.


 90%|█████████ | 139/154 [29:35<03:01, 12.12s/it]


🔍 Scraping Belgian Pro League 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 545 players scraped.


 91%|█████████ | 140/154 [29:50<03:02, 13.04s/it]


🔍 Scraping Belgian Pro League 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 538 players scraped.


 92%|█████████▏| 141/154 [30:04<02:54, 13.40s/it]


🔍 Scraping Belgian Pro League 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 540 players scraped.


 92%|█████████▏| 142/154 [30:18<02:41, 13.48s/it]


🔍 Scraping Belgian Pro League 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 479 players scraped.


 93%|█████████▎| 143/154 [30:31<02:28, 13.47s/it]


🔍 Scraping Serie B 2013-2014...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 323 players scraped.


 94%|█████████▎| 144/154 [30:43<02:08, 12.86s/it]


🔍 Scraping Serie B 2014-2015...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 347 players scraped.


 94%|█████████▍| 145/154 [30:54<01:51, 12.34s/it]


🔍 Scraping Serie B 2015-2016...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 365 players scraped.


 95%|█████████▍| 146/154 [31:05<01:36, 12.05s/it]


🔍 Scraping Serie B 2016-2017...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 350 players scraped.


 95%|█████████▌| 147/154 [31:16<01:21, 11.71s/it]


🔍 Scraping Serie B 2017-2018...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 365 players scraped.


 96%|█████████▌| 148/154 [31:28<01:10, 11.74s/it]


🔍 Scraping Serie B 2018-2019...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 400 players scraped.


 97%|█████████▋| 149/154 [31:39<00:58, 11.71s/it]


🔍 Scraping Serie B 2019-2020...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 342 players scraped.


 97%|█████████▋| 150/154 [31:50<00:45, 11.47s/it]


🔍 Scraping Serie B 2020-2021...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 374 players scraped.


 98%|█████████▊| 151/154 [32:04<00:36, 12.07s/it]


🔍 Scraping Serie B 2021-2022...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 388 players scraped.


 99%|█████████▊| 152/154 [32:16<00:23, 12.00s/it]


🔍 Scraping Serie B 2022-2023...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 369 players scraped.


 99%|█████████▉| 153/154 [32:26<00:11, 11.61s/it]


🔍 Scraping Serie B 2023-2024...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 360 players scraped.


100%|██████████| 154/154 [32:36<00:00, 12.71s/it]



🎉 Done! Collected 76502 player-season rows across 143 tables.


In [67]:
combined_df[['Player','League','Season']]

Unnamed: 0,Player,League,Season
0,Charlie Adam,Premier League,2013-2014
1,Emmanuel Adebayor,Premier League,2013-2014
2,Adrián,Premier League,2013-2014
3,Gabriel Agbonlahor,Premier League,2013-2014
4,Daniel Agger,Premier League,2013-2014
...,...,...,...
76497,Scott Wright,Serie B,2023-2024
76498,Tete Yengi,Serie B,2023-2024
76499,Rıdvan Yılmaz,Serie B,2023-2024
76500,Elie Youan,Serie B,2023-2024


In [69]:
combined_df.loc[combined_df['Player'] == 'Manor Solomon'][['Player','League','Season']]

Unnamed: 0,Player,League,Season
5336,Manor Solomon,Premier League,2022-2023
5917,Manor Solomon,Premier League,2023-2024
64727,Manor Solomon,Liga Profesional,2018-2019
65120,Manor Solomon,Liga Profesional,2019-2020
65579,Manor Solomon,Liga Profesional,2020-2021
66009,Manor Solomon,Liga Profesional,2021-2022


In [83]:
last_season_list = []
for league, season, url in tqdm(last_season):
    try:
        print(f"\n🔍 Scraping {league} '2024-2025'...")

        driver.get(url)
        time.sleep(random.uniform(5, 7))  # Wait for JS to load

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        table = soup.find('table', {'id': 'stats_standard'})
        if table is None:
            print(f"⚠️ No player stats table found at {url}")
            continue

        df = pd.read_html(str(table), header=[0, 1])[0]
        df.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
        df.columns = [ col.replace("Unnamed: ", "").split(" ", 1)[-1] if col.startswith("Unnamed: ") else col for col in df.columns ]
        df = df[df['Player'].values != 'Player']  # Remove internal header rows
        df['League'] = league
        df['Season'] = season
        last_season_list.append(df)

        print(f"✅ {len(df)} players scraped.")
        time.sleep(random.uniform(1, 3))

    except Exception as e:
        print(f"❌ Error scraping {league} {season}: {e}")
        time.sleep(random.uniform(3, 6))

# --- CLEANUP ---
driver.quit()

# --- RESULT ---
if last_season_list:
    last_season_df = pd.concat(last_season_list, ignore_index=True)
    print(f"\n🎉 Done! Collected {combined_df.shape[0]} player-season rows across {len(all_data)} tables.")
else:
    print("🚫 No data was scraped.")

  0%|          | 0/14 [00:00<?, ?it/s]


🔍 Scraping Premier League '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 574 players scraped.


  7%|▋         | 1/14 [00:14<03:06, 14.36s/it]


🔍 Scraping La Liga '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 601 players scraped.


 14%|█▍        | 2/14 [00:28<02:50, 14.20s/it]


🔍 Scraping Serie A '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 634 players scraped.


 21%|██▏       | 3/14 [00:44<02:45, 15.05s/it]


🔍 Scraping Bundesliga '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 492 players scraped.


 29%|██▊       | 4/14 [00:57<02:23, 14.37s/it]


🔍 Scraping Ligue 1 '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 553 players scraped.


 36%|███▌      | 5/14 [01:13<02:12, 14.68s/it]


🔍 Scraping Championship '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 765 players scraped.


 43%|████▎     | 6/14 [01:30<02:04, 15.62s/it]


🔍 Scraping Major League Soccer '2024-2025'...


 50%|█████     | 7/14 [01:41<01:39, 14.20s/it]

⚠️ No player stats table found at https://fbref.com/en/comps/19/2024-2025/stats/2024-2025-Major-League-Soccer-Stats

🔍 Scraping Brasileirao '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 532 players scraped.


 57%|█████▋    | 8/14 [01:56<01:26, 14.42s/it]


🔍 Scraping Liga MX '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 636 players scraped.


 64%|██████▍   | 9/14 [02:10<01:10, 14.10s/it]


🔍 Scraping Eredivisie '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 534 players scraped.


 71%|███████▏  | 10/14 [02:22<00:54, 13.65s/it]


🔍 Scraping Primeira Liga '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 585 players scraped.


 79%|███████▊  | 11/14 [02:36<00:41, 13.83s/it]


🔍 Scraping Liga Profesional '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 521 players scraped.


 86%|████████▌ | 12/14 [02:49<00:26, 13.46s/it]


🔍 Scraping Belgian Pro League '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 486 players scraped.


 93%|█████████▎| 13/14 [03:04<00:13, 13.77s/it]


🔍 Scraping Serie B '2024-2025'...


  df = pd.read_html(str(table), header=[0, 1])[0]


✅ 372 players scraped.


100%|██████████| 14/14 [03:17<00:00, 14.09s/it]



🎉 Done! Collected 76502 player-season rows across 143 tables.


In [89]:
last_season_df.loc[last_season_df['League'] == 'Belgian Pro League']['Squad'].unique()

array(['Gent', 'Cercle Brugge', 'Antwerp', 'Dender', 'Genk', 'Anderlecht',
       'OH Leuven', 'Westerlo', 'Kortrijk', 'Standard Liège',
       'Sint-Truiden', 'Charleroi', 'Mechelen', 'Union SG', 'Beerschot',
       'Club Brugge'], dtype=object)

In [3]:
combined_df.loc[combined_df['Player'] == 'Anan Khalaili']

NameError: name 'combined_df' is not defined

In [2]:
last_season_df.loc[last_season_df['Squad'] == 'Union SG'].sort_values(by='Per 90 Minutes xG+xAG',ascending=False)

NameError: name 'last_season_df' is not defined

In [1]:
last_season_df.to_csv('last_season.csv')

NameError: name 'last_season_df' is not defined