In [57]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Setup
options = Options()
options.headless = False
driver = webdriver.Chrome(options=options)
driver.get("https://www.capology.com/uk/premier-league/salaries/2021-2022/")
wait = WebDriverWait(driver, 10)
time.sleep(5)

# Optional: Hide sticky navbar
driver.execute_script("""
    const sticky = document.querySelector('.nav-sticky');
    if (sticky) sticky.style.display = 'none';
""")

all_data = []

# Find how many pages exist
pagination_links = driver.find_elements(By.CSS_SELECTOR, "ul.pagination li a.page-link")
page_numbers = [int(link.text) for link in pagination_links if link.text.isdigit()]
total_pages = max(page_numbers)
print(f"📄 Total pages found: {total_pages}")

# Loop through each page
for page_num in range(1, total_pages + 1):
    print(f"🔍 Scraping page {page_num}...")

    # Wait for table to load
    wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table tbody tr")))
    rows = driver.find_elements(By.CSS_SELECTOR, "table tbody tr")

    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        if len(cols) >= 6:
            player = cols[0].text.strip()
            club = cols[1].text.strip()
            weekly_salary = cols[3].text.strip()
            annual_salary = cols[4].text.strip()
            all_data.append([player, club, weekly_salary, annual_salary])

    # Don't click after last page
    if page_num < total_pages:
        try:
            next_page_btn = wait.until(EC.presence_of_element_located(
                (By.XPATH, f"//ul[contains(@class,'pagination')]//a[text()='{page_num + 1}']")
            ))
            driver.execute_script("arguments[0].scrollIntoView(true);", next_page_btn)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", next_page_btn)
            time.sleep(4)
        except Exception as e:
            print(f"⚠️ Could not click page {page_num + 1}: {e}")
            break

driver.quit()

# Save CSV
df = pd.DataFrame(all_data, columns=["Player", "Weekly Salary", "Annual Salary", "Position"])
df.to_csv("premier_league_salaries_2021_2022.csv", index=False)
print("✅ All done! Data from all pages saved.")

📄 Total pages found: 23
🔍 Scraping page 1...
🔍 Scraping page 2...
🔍 Scraping page 3...
🔍 Scraping page 4...
🔍 Scraping page 5...
🔍 Scraping page 6...
🔍 Scraping page 7...
🔍 Scraping page 8...
🔍 Scraping page 9...
🔍 Scraping page 10...
🔍 Scraping page 11...
🔍 Scraping page 12...
🔍 Scraping page 13...
🔍 Scraping page 14...
🔍 Scraping page 15...
🔍 Scraping page 16...
🔍 Scraping page 17...
🔍 Scraping page 18...
🔍 Scraping page 19...
🔍 Scraping page 20...
🔍 Scraping page 21...
🔍 Scraping page 22...
🔍 Scraping page 23...
✅ All done! Data from all pages saved.


In [59]:
display(df)

Unnamed: 0,Player,Weekly Salary,Annual Salary,Position
0,Cristiano Ronaldo,"£ 515,000","£ 28,599,424",F
1,Kevin De Bruyne,"£ 400,000","£ 22,213,145",F
2,David de Gea,"£ 375,000","£ 20,824,824",K
3,Raphaël Varane,"£ 340,000","£ 18,881,173",D
4,Romelu Lukaku,"£ 325,000","£ 18,048,180",F
...,...,...,...,...
557,,,,
558,,,,
559,,,,
560,,,,


In [88]:
display(df.head(476))

Unnamed: 0,Player,Weekly Salary,Annual Salary,Position
0,Cristiano Ronaldo,"£ 515,000","£ 28,599,424",F
1,Kevin De Bruyne,"£ 400,000","£ 22,213,145",F
2,David de Gea,"£ 375,000","£ 20,824,824",K
3,Raphaël Varane,"£ 340,000","£ 18,881,173",D
4,Romelu Lukaku,"£ 325,000","£ 18,048,180",F
...,...,...,...,...
471,Zanka,"£ 15,000","£ 832,993",D
472,Ashley Fletcher,"£ 15,000","£ 832,993",F
473,Emmanuel Dennis,"£ 15,000","£ 832,993",F
474,Ken Sema,"£ 15,000","£ 832,993",M


In [110]:
df.replace('', pd.NA, inplace=True)  # Convert empty strings to NaN
df.dropna(how='all', inplace=True)
df.reset_index(drop=True, inplace=True)


In [114]:
df

Unnamed: 0,Player,Weekly Salary,Annual Salary,Position
0,Cristiano Ronaldo,"£ 515,000","£ 28,599,424",F
1,Kevin De Bruyne,"£ 400,000","£ 22,213,145",F
2,David de Gea,"£ 375,000","£ 20,824,824",K
3,Raphaël Varane,"£ 340,000","£ 18,881,173",D
4,Romelu Lukaku,"£ 325,000","£ 18,048,180",F
...,...,...,...,...
470,Dan Burn,"£ 15,000","£ 832,993",D
471,Zanka,"£ 15,000","£ 832,993",D
472,Ashley Fletcher,"£ 15,000","£ 832,993",F
473,Emmanuel Dennis,"£ 15,000","£ 832,993",F


In [None]:
df.to_csv('