Load all player links first: The script first collects all player links before navigating to any of them. This avoids the StaleElementReferenceException caused by navigating away from the page while iterating through elements.

Explicit Waits: Using explicit waits to ensure the elements are present before interacting with them.
Handling StaleElementReferenceException: Catching and handling StaleElementReferenceException to ensure the script continues even if some elements are no longer attached to the DOM.
Additional Tips:
Ensure your ChromeDriver and Chrome browser are up-to-date.
You can add more robust error handling and logging to catch and debug any other issues that arise.



In [1]:
import ctypes
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException
from bs4 import BeautifulSoup
import time
import os
import requests

# Prevent sleep mode (Windows only, comment out if on mac or linux)
ctypes.windll.kernel32.SetThreadExecutionState(0x80000002)

options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)

driver.get('https://www.premierleague.com/players')

SCROLL_PAUSE_TIME = 2
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SCROLL_PAUSE_TIME)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

output_dir = 'player_images'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

try:
    # Updated class name based on inspection
    WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'player__name')))
except TimeoutException:
    print("Timeout occurred while waiting for player name class to be present")
    driver.quit()
    exit()

player_links = driver.find_elements(By.CLASS_NAME, 'player__name')

links = []
for link in player_links:
    try:
        player_url = link.get_attribute('href')
        if not player_url.startswith('http'):
            player_url = f"https:{player_url}"
        print(f"Collected player URL: {player_url}")  # Debugging: Print collected URL
        links.append(player_url)
    except StaleElementReferenceException as e:
        print(f"Stale element reference: {e}")
        continue

print(f"Total player links collected: {len(links)}")  # Debugging: Print the number of collected links

for player_url in links:
    print(f"Navigating to player URL: {player_url}")  # Debugging: Print URL being navigated to
    driver.get(player_url)
    
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'img')))
    except TimeoutException:
        print("Timed out waiting for image element to be present")
        continue
    
    time.sleep(5)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    img_tag = soup.find('img', class_='img')
    if img_tag:
        img_url = img_tag['src']
        player_name = img_tag['alt'].strip().replace(' ', '_')
        print(f"Found image for {player_name}: {img_url}")  # Debugging: Print image URL and player name
        try:
            img_data = requests.get(img_url, timeout=10).content
            with open(os.path.join(output_dir, f"{player_name}.jpg"), 'wb') as f:
                f.write(img_data)
                print(f"Saved image for {player_name}")  # Debugging: Confirm image save
        except Exception as e:
            print(f"Error downloading image for {player_name}: {e}")

driver.quit()


Collected player URL: https://www.premierleague.com/players/147241/Player-2/overview
Collected player URL: https://www.premierleague.com/players/19970/Max-Aarons/overview
Collected player URL: https://www.premierleague.com/players/117754/Zach-Abbott/overview
Collected player URL: https://www.premierleague.com/players/125075/Josh-Acheampong/overview
Collected player URL: https://www.premierleague.com/players/9199/Adam-Armstrong/overview
Collected player URL: https://www.premierleague.com/players/42894/Tyler-Adams/overview
Collected player URL: https://www.premierleague.com/players/13549/Tosin-Adarabioyo/overview
Collected player URL: https://www.premierleague.com/players/72541/Tayo-Adaramola/overview
Collected player URL: https://www.premierleague.com/players/108693/Valintino-Adedokun/overview
Collected player URL: https://www.premierleague.com/players/126317/Simon-Adingra/overview
Collected player URL: https://www.premierleague.com/players/50460/Jaidon-Anthony/overview
Collected player