In [1]:
import os
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from io import BytesIO
from PIL import Image
# from webdriver_manager.chrome import ChromeDriverManager

# ====== CONFIG ======
url = "https://www.espncricinfo.com/photo"  # <-- Change to your target website
download_folder = "downloaded_images"
scroll_pause_time = 2  # seconds between scrolls
max_scrolls = 10       # how many times to scroll
target_width = 800
target_height = 600
# =====================

# Setup Chrome
driver = webdriver.Chrome()

# Open the website
driver.get(url)

# Make sure folder exists
os.makedirs(download_folder, exist_ok=True)

# Scrolling and collecting image URLs
image_urls = set()
last_height = driver.execute_script("return document.body.scrollHeight")

for i in range(max_scrolls):
    print(f"Scrolling {i+1}/{max_scrolls}...")
    
    # Scroll to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    # Wait for new content to load
    time.sleep(scroll_pause_time)
    
    # Collect image elements
    imgs = driver.find_elements(By.TAG_NAME, "img")
    for img in imgs:
        src = img.get_attribute("src")
        if src and src.startswith("http"):
            image_urls.add(src)
    
    # Check if page height stopped changing (end of page)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

print(f"Found {len(image_urls)} images. Starting download...")
count = 0
# Download images
for i, url in enumerate(image_urls):
    # try:
    #     img_data = requests.get(url, timeout=10).content
    #     img_data
    #     with open(os.path.join(download_folder, f"image_{i+1}.jpg"), "wb") as f:
    #         f.write(img_data)
    #     print(f"Downloaded {i+1}/{len(image_urls)}")
    # except Exception as e:
    #     print(f"Failed to download {url}: {e}")
    try:
        response = requests.get(url, timeout=10)
        img = Image.open(BytesIO(response.content))
        # Resize to 800x600
        resized = img.resize((target_width, target_height), Image.LANCZOS)
        if resized.size == (target_width, target_height):
            count += 1
            save_path = os.path.join(download_folder, f"image_{count}.jpg")
            resized.save(save_path)
            print(f"Saved image {count} ({resized.size}) from {url}")
        else:
            # Uncomment next line to print all sizes found
            print(f"Skipped ({resized.size}) {url}")
            pass

    except Exception as e:
        print(f"Error downloading {url}: {e}")

driver.quit()
print("✅ Done.")


Scrolling 1/10...
Scrolling 2/10...
Scrolling 3/10...
Scrolling 4/10...
Scrolling 5/10...
Scrolling 6/10...
Scrolling 7/10...
Scrolling 8/10...
Scrolling 9/10...
Scrolling 10/10...
Found 73 images. Starting download...
Saved image 1 ((800, 600)) from https://img1.hscicdn.com/image/upload/f_auto,t_ds_wide_w_1200,q_60/lsci/db/PICTURES/CMS/409100/409143.jpg
Saved image 2 ((800, 600)) from https://img1.hscicdn.com/image/upload/f_auto,t_ds_w_960,q_50/lsci/db/PICTURES/CMS/409100/409103.jpg
Saved image 3 ((800, 600)) from https://img1.hscicdn.com/image/upload/f_auto,t_ds_w_960,q_50/lsci/db/PICTURES/CMS/409100/409112.jpg
Saved image 4 ((800, 600)) from https://img1.hscicdn.com/image/upload/f_auto,t_ds_w_960,q_50/lsci/db/PICTURES/CMS/409000/409074.jpg
Error downloading https://wassets.hscicdn.com/static/images/lazyimage-transparent.png: cannot write mode RGBA as JPEG
Saved image 6 ((800, 600)) from https://img1.hscicdn.com/image/upload/f_auto,t_ds_w_960,q_50/lsci/db/PICTURES/CMS/409100/409124.j