In [9]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
import requests
import time
import os

# Create a folder to store the downloaded images
os.makedirs('europeana_images', exist_ok=True)

In [58]:
gecko_driver_path = 'geckodriver.exe'  # Update this path

# Setup WebDriver for Firefox using manually downloaded GeckoDriver
firefox_options = Options()
    
service = FirefoxService(executable_path=gecko_driver_path)
driver = webdriver.Firefox(service=service, options=firefox_options)


In [38]:
base_url = "https://www.europeana.eu/en/search?page={page_num}&qf=collection%3Aphotography&qf=TYPE%3A%22IMAGE%22&query=landscapes&reusability=open&view=grid"

In [39]:
# Create the output directory
output_dir = "europeana_images"
os.makedirs(output_dir, exist_ok=True)

# Function to download images
def download_image(img_url, page, index):
    if img_url.startswith('http'):
        try:
            img_data = requests.get(img_url, timeout=10).content
            image_name = os.path.join(output_dir, f'page_{page}_image_{index}.jpg')
            with open(image_name, 'wb') as handler:
                handler.write(img_data)
            print(f"Downloaded {img_url} as {image_name}")
        except requests.RequestException as e:
            print(f"Failed to download {img_url}: {e}")

In [40]:
def download_imagetext(img_url, page, index, Type):
    if img_url.startswith('http'):
        try:
            img_data = requests.get(img_url, timeout=10).content
            image_name = os.path.join(output_dir, f'page_{page}_image_{index}_{Type}.jpg')
            with open(image_name, 'wb') as handler:
                handler.write(img_data)
            print(f"Downloaded {img_url} as {image_name}")
        except requests.RequestException as e:
            print(f"Failed to download {img_url}: {e}")

In [41]:
# Scroll to load all content
def scroll_to_load_all(driver, max_scrolls=10):
    last_height = driver.execute_script("return document.body.scrollHeight")
    for _ in range(max_scrolls):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Adjust for content loading
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

In [42]:
for page in range(1, 35):  # Adjust the range as needed
    print(f"Scraping page {page}")
    url = base_url.format(page_num=page)
    driver.get(url)

    # Wait for a key element to load
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "card-img"))
        )
    except Exception as e:
        print(f"Error loading page {page}: {e}")
        continue

    # Scroll to load all images
    scroll_to_load_all(driver)

    # Get page source and parse with BeautifulSoup
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find image divs
    image_divs = soup.find_all('div', class_='card-img')

    if not image_divs:
        print(f"No images found on page {page}")
        continue

    # Use ThreadPoolExecutor for concurrent downloads
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust workers for your system
        for index, div in enumerate(image_divs):
            img_tag = div.find('img')
            if img_tag:
                img_url = img_tag.get('src')
                if not img_url.startswith('data:'):  # Skip placeholders
                    executor.submit(download_image, img_url, page, index)

driver.quit()

Scraping page 1
Downloaded https://api.europeana.eu/thumbnail/v3/400/28547bbbb3f56d7e57ade027f3466dd0 as europeana_images\page_1_image_1.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/a30291392aaacf633bc727a0ab2c8727 as europeana_images\page_1_image_3.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/9d1581d069f2bc998020c06a5b4a3a68 as europeana_images\page_1_image_6.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/3f5ea81af210fead8f2abbcb3a215c45 as europeana_images\page_1_image_5.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/787dda8a286321e9d6826c1fe182cf76 as europeana_images\page_1_image_7.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/95b941e6333e75ac16fc3d7824003d9c as europeana_images\page_1_image_11.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ee46761769c8ed7f56b121e93482ecab as europeana_images\page_1_image_9.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/43ce4ad42a902a0935d0f3883e94ec6b as europeana_images\pa

Downloaded https://api.europeana.eu/thumbnail/v3/400/cb5c30cd4c12340c327fedf85ea9b6ec as europeana_images\page_7_image_13.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/44e07d4de0b24df0ac2e27f31083ac85 as europeana_images\page_7_image_11.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/e44fe4aa077092afed61c6e976bbd26f as europeana_images\page_7_image_15.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/f13744036a02d42067de9996ed7f9d18 as europeana_images\page_7_image_17.jpg
Scraping page 8
Downloaded https://api.europeana.eu/thumbnail/v3/400/618842397b2fd2b1002f4c21d5b9f1ff as europeana_images\page_8_image_7.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ef6e26e54310b5bf50820b20571b8e17 as europeana_images\page_8_image_3.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/9cc8e58c53721b9d97e880ac084fc10a as europeana_images\page_8_image_5.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/cb6bd145e898d3165593ec1f5103f596 as europeana_images

Downloaded https://api.europeana.eu/thumbnail/v3/400/ef3106b9090a60c80c4e575c75258300 as europeana_images\page_15_image_7.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/1ec885317d32b4551d4a82d405eb767a as europeana_images\page_15_image_9.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/e9001f0f56926cff87cded36fe619166 as europeana_images\page_15_image_3.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/83938ca0c1cfe3b053bef945bb038dbe as europeana_images\page_15_image_1.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/9c3f0c1172db0b9d4e7f57b045b72533 as europeana_images\page_15_image_5.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/3a834c5cd1288b60230d76ce193ec32c as europeana_images\page_15_image_11.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/8ac78e19a23a55fa1f415c6642928b1d as europeana_images\page_15_image_15.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/4d8bd816e9bee2f1f78769e030e16fa3 as europeana_images\page_15_im

Downloaded https://api.europeana.eu/thumbnail/v3/400/6f3805d9e4f7ae093115ad0e040214b4 as europeana_images\page_22_image_1.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/0821009933d33e4b8e13c34c6ebdd1db as europeana_images\page_22_image_15.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/a3404dd7d11dba3905e7c963e882e90d as europeana_images\page_22_image_11.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/455d966e4d6018126aed5da35d75fa03 as europeana_images\page_22_image_17.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/325dc85a0e0b51d86ce7d4a8c58c955e as europeana_images\page_22_image_13.jpg
Scraping page 23
Downloaded https://api.europeana.eu/thumbnail/v3/400/4b3d8d8e27dcda4bfdd2776c0e13f62c as europeana_images\page_23_image_1.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/21e3de63a05df1d164562e7f2dfc31fd as europeana_images\page_23_image_5.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/16a94d0cbbeb2f1e3f83adb3e702ded8 as european

Downloaded https://api.europeana.eu/thumbnail/v3/400/f250b66f4dc47a55784d51d2aa9869ef as europeana_images\page_29_image_11.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/59c5c26eb7ab80024eae8514dddae42e as europeana_images\page_29_image_13.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/0108b688d5b3030e170a0476639dd89c as europeana_images\page_29_image_15.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/16b8c4e6667fd08e5fec80d9b3696bda as europeana_images\page_29_image_17.jpg
Scraping page 30
Downloaded https://api.europeana.eu/thumbnail/v3/400/96160fb76680b776a3d935233e1d740f as europeana_images\page_30_image_5.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ee0e7f8e6768a0c7fbfa119ea3fba9d3 as europeana_images\page_30_image_1.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/54d2aebeca5eeb8c10d89db987c52173 as europeana_images\page_30_image_3.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/3de3410d87f719f895c79bf13e39cc97 as european

In [44]:
base_url_1 = "https://www.europeana.eu/en/search?page={page_num}&qf=collection%3Aphotography&qf=TYPE%3A%22IMAGE%22&query=monuments&reusability=open&view=grid"

In [46]:
for page in range(1, 35):  # Adjust the range as needed
    print(f"Scraping page {page}")
    url = base_url_1.format(page_num=page)
    driver.get(url)

    # Wait for a key element to load
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "card-img"))
        )
    except Exception as e:
        print(f"Error loading page {page}: {e}")
        continue

    # Scroll to load all images
    scroll_to_load_all(driver)

    # Get page source and parse with BeautifulSoup
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find image divs
    image_divs = soup.find_all('div', class_='card-img')

    if not image_divs:
        print(f"No images found on page {page}")
        continue

    # Use ThreadPoolExecutor for concurrent downloads
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust workers for your system
        for index, div in enumerate(image_divs):
            img_tag = div.find('img')
            if img_tag:
                img_url = img_tag.get('src')
                if not img_url.startswith('data:'):  # Skip placeholders
                    Type = "monument"
                    executor.submit(download_imagetext, img_url, page, index, Type)

driver.quit()

Scraping page 1
Downloaded https://api.europeana.eu/thumbnail/v3/400/ef4f72d8a9196da24bac24ea36088adb as europeana_images\page_1_image_5_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/3b2c85ddcc68ebd84670c1cd1ffa4308 as europeana_images\page_1_image_8_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/39663b9374ed90ac518616c30c1be4b4 as europeana_images\page_1_image_3_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/787dda8a286321e9d6826c1fe182cf76 as europeana_images\page_1_image_6_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/47ec2c02ad23fef2d8dbd1df76a64efd as europeana_images\page_1_image_1_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/0acae16dc2bad9c150b41923321997ce as europeana_images\page_1_image_10_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/a10c393492f180a5810870e0d46f574d as europeana_images\page_1_image_12_monument.jpg
Downloaded https://api.europeana.eu/thumbnai

Downloaded https://api.europeana.eu/thumbnail/v3/400/6d0beb2af916713212fbe5065e98f208 as europeana_images\page_7_image_7_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/e0021e7177e4c682575cfb173cce36fb as europeana_images\page_7_image_5_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/6d6ba1b1552dcd344eb7cf21831ac696 as europeana_images\page_7_image_1_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/5a114df4494731c4e9bb7168210be7d3 as europeana_images\page_7_image_11_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/a0af23ebb78cc0e94925af86a06796a1 as europeana_images\page_7_image_3_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/3b0e48da664c79a0e4979cbb30dad915 as europeana_images\page_7_image_15_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/7c3547208cadee639a4462e2bf3de1e1 as europeana_images\page_7_image_17_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/25a93e

Downloaded https://api.europeana.eu/thumbnail/v3/400/1f0853306df2523525e2f26f6837beec as europeana_images\page_13_image_17_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/fe6edcd4219623962e90ba94d2d03bcc as europeana_images\page_13_image_15_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/47af1f4a8c1fe987782a7d83239ee98a as europeana_images\page_13_image_5_monument.jpg
Scraping page 14
Downloaded https://api.europeana.eu/thumbnail/v3/400/52b7e3eaaf464ce39107b4685b1d0ec8 as europeana_images\page_14_image_9_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/9a8ef5db7413d29a1b32a45d399a4a5c as europeana_images\page_14_image_3_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/58f99a0c9a47025b11f388e14f73c153 as europeana_images\page_14_image_5_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/e4ca1f12d8534b5a951fceac24343395 as europeana_images\page_14_image_7_monument.jpg
Downloaded https://api.europeana.eu/

Downloaded https://api.europeana.eu/thumbnail/v3/400/5eb435527b1cfd0d051c1afbf7585de2 as europeana_images\page_20_image_11_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/530639c21d214aea68146d425eda19bc as europeana_images\page_20_image_17_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/e2d6a79585fdb23e401e01c2444153e4 as europeana_images\page_20_image_15_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/8cdf336a85a4f3dc484838abf0e2ab12 as europeana_images\page_20_image_13_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/a7eb7987a364cab02567a4b4b2d62995 as europeana_images\page_20_image_7_monument.jpg
Scraping page 21
Downloaded https://api.europeana.eu/thumbnail/v3/400/91850ff9e168e6830a13a94a4c2bc056 as europeana_images\page_21_image_9_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/16d62dbe859a7b78e8ecb7e25ea6dc1d as europeana_images\page_21_image_1_monument.jpg
Downloaded https://api.europeana.e

Downloaded https://api.europeana.eu/thumbnail/v3/400/cca232c927b5923e1d48a62584d46e5c as europeana_images\page_27_image_3_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/44447bc2b438b746670f7ddf4c2ec085 as europeana_images\page_27_image_5_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/c71fbe29a0a7850e3352424702775f6a as europeana_images\page_27_image_7_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ccdd1fd0d7c10b6a5776181d0e27a1f1 as europeana_images\page_27_image_13_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/7143f15bb11a8cb54af085997cfdbac9 as europeana_images\page_27_image_17_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/75aaadc4da0b64538430613862d04476 as europeana_images\page_27_image_15_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/c455565846083d171fa145175a6690ae as europeana_images\page_27_image_11_monument.jpg
Scraping page 28
Downloaded https://api.europeana.e

Downloaded https://api.europeana.eu/thumbnail/v3/400/533183fafc26e14c03935a504d54732d as europeana_images\page_34_image_5_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ac1a7a69df17980e0f23d5dfd36e9373 as europeana_images\page_34_image_1_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/669bfc177f03384db9899af0f505dbb1 as europeana_images\page_34_image_7_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/7bf7c7d126b8f8dba3361cc3f9f5d49e as europeana_images\page_34_image_9_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/a4056821d6ed03eecd7cc502c607e716 as europeana_images\page_34_image_13_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/45886ddb96df6e92aa013cf6836ca2b5 as europeana_images\page_34_image_17_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/1176be2dfdd998bd57d40fafee6df2f0 as europeana_images\page_34_image_15_monument.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400

In [48]:
base_url_2 = "https://www.europeana.eu/en/search?page={page_num}&qf=collection%3Aphotography&qf=TYPE%3A%22IMAGE%22&qf=contentTier%3A%224%22&query=war&reusability=open&view=grid"

In [49]:
for page in range(1, 35):  # Adjust the range as needed
    print(f"Scraping page {page}")
    url = base_url_2.format(page_num=page)
    driver.get(url)

    # Wait for a key element to load
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "card-img"))
        )
    except Exception as e:
        print(f"Error loading page {page}: {e}")
        continue

    # Scroll to load all images
    scroll_to_load_all(driver)

    # Get page source and parse with BeautifulSoup
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find image divs
    image_divs = soup.find_all('div', class_='card-img')

    if not image_divs:
        print(f"No images found on page {page}")
        continue

    # Use ThreadPoolExecutor for concurrent downloads
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust workers for your system
        for index, div in enumerate(image_divs):
            img_tag = div.find('img')
            if img_tag:
                img_url = img_tag.get('src')
                if not img_url.startswith('data:'):  # Skip placeholders
                    Type="war"
                    executor.submit(download_imagetext, img_url, page, index, Type)

driver.quit()

Scraping page 1
Downloaded https://api.europeana.eu/thumbnail/v3/400/49a95af0f98a603fc401541dacfa45fe as europeana_images\page_1_image_6_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ea948bcf09f2ec2b02d93d070aafd2fc as europeana_images\page_1_image_5_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/8c7b12d77f47864d489a3addfdc7c34e as europeana_images\page_1_image_3_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/592ae4cff6f340f7be6b46b7fa1e571e as europeana_images\page_1_image_1_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/0b92bf59ec88318ac7c9c76124380dc2 as europeana_images\page_1_image_8_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/38961dd46426179043e0313b26d2bf8f as europeana_images\page_1_image_12_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/fae1a0c29cc7568459329b409b6fe729 as europeana_images\page_1_image_10_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/f88d7cfb2fbf493a57b2b808e7

Downloaded https://api.europeana.eu/thumbnail/v3/400/53a99648dcfc7c8fe38361ccdee30657 as europeana_images\page_6_image_13_war.jpg
Failed to download https://api.europeana.eu/thumbnail/v3/400/c185e3b529acc30ebcb60e08e07b67f8: HTTPSConnectionPool(host='api.europeana.eu', port=443): Read timed out.
Scraping page 7
Downloaded https://api.europeana.eu/thumbnail/v3/400/484ea94fa88c5a60d7d76996f696bd20 as europeana_images\page_7_image_1_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/7c5d0bd39add9b46d80cb3bae02be8c0 as europeana_images\page_7_image_7_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/53e57f2600572c78e670b5bb70ea1e8d as europeana_images\page_7_image_3_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/efb3c3d12d8a8c34d4adc34c873b87bf as europeana_images\page_7_image_9_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/be7ab8a4dc62da36c5552f272d8f0630 as europeana_images\page_7_image_11_war.jpg
Downloaded https://api.europeana.eu/thumb

Downloaded https://api.europeana.eu/thumbnail/v3/400/597621bbb2a8669ce12cc155860a9a19 as europeana_images\page_18_image_1_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/1e01aea2668cc3c3584603f2de397d85 as europeana_images\page_18_image_11_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/b94e848c05c7b99a88b9b08736dc8f21 as europeana_images\page_18_image_13_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/6c45a8267749ae49be8c3ae9b88d7969 as europeana_images\page_18_image_17_war.jpg
Scraping page 19
Downloaded https://api.europeana.eu/thumbnail/v3/400/0237804061d301ad491206deafd56ee9 as europeana_images\page_19_image_5_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/4b8a6e2cac8062713c823ef515e1d7af as europeana_images\page_19_image_3_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/c28a84fa527fb3a7ad6112abc03774e9 as europeana_images\page_19_image_1_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/d24155fb615066a69

Downloaded https://api.europeana.eu/thumbnail/v3/400/ffb1707f6a1031dc9b07e6adc887cc2a as europeana_images\page_25_image_13_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/35bf50d262c18df5ed7c6a77a3eac737 as europeana_images\page_25_image_11_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/3f28b12e56e2f16cd758d00c8af81646 as europeana_images\page_25_image_17_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ea7ab1dd72acc6b787acff75099d9e3e as europeana_images\page_25_image_15_war.jpg
Scraping page 26
Scraping page 27
Downloaded https://api.europeana.eu/thumbnail/v3/400/64f1bb693520a88ed37f90afc813416c as europeana_images\page_27_image_7_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/c5461693fe530de98b0907b0808a35ca as europeana_images\page_27_image_1_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/6583242951f9a426bfb1419aedd342c7 as europeana_images\page_27_image_3_war.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400

In [59]:
base_url_3 = "https://www.europeana.eu/en/search?page={page_num}&qf=collection%3Aww1&qf=TYPE%3A%22IMAGE%22&qf=contentTier%3A%224%22&qf=IMAGE_SIZE%3Alarge&query=family&reusability=open&view=grid"

In [60]:
for page in range(1, 35):  # Adjust the range as needed
    print(f"Scraping page {page}")
    url = base_url_3.format(page_num=page)
    driver.get(url)

    # Wait for a key element to load
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "card-img"))
        )
    except Exception as e:
        print(f"Error loading page {page}: {e}")
        continue

    # Scroll to load all images
    scroll_to_load_all(driver)

    # Get page source and parse with BeautifulSoup
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find image divs
    image_divs = soup.find_all('div', class_='card-img')

    if not image_divs:
        print(f"No images found on page {page}")
        continue

    # Use ThreadPoolExecutor for concurrent downloads
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust workers for your system
        for index, div in enumerate(image_divs):
            img_tag = div.find('img')
            if img_tag:
                img_url = img_tag.get('src')
                if not img_url.startswith('data:'):  # Skip placeholders
                    Type="family"
                    executor.submit(download_imagetext, img_url, page, index, Type)

driver.quit()

Scraping page 1
Downloaded https://api.europeana.eu/thumbnail/v3/400/17140e6bf63b49cf693b1262eb78f864 as europeana_images\page_1_image_5_family.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/5c4172e3295f759331eeef6847c121ee as europeana_images\page_1_image_9_family.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/e915c166a804f81f7198a81f8651d038 as europeana_images\page_1_image_1_family.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/beec35695fd40af5bac98d916a15e718 as europeana_images\page_1_image_3_family.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/ee4afa0f9d696113ea95f9d5c5d6935b as europeana_images\page_1_image_7_family.jpg
Downloaded https://api.europeana.eu/thumbnail/v3/400/4c61fee2ab7442517a3ef9acb74fec1a as europeana_images\page_1_image_11_family.jpg
Downloaded https://images.ctfassets.net/i01duvb6kq77/6iXPsH64XWGWfEnIvmMYqd/5a171e0515993b12374a8909d3972ae0/Semesterbild._Familj_p__campingsemester_i_Volkswagenbuss__Folkvagn_.?q=80&fm=jpg&f

InvalidSessionIdException: Message: WebDriver session does not exist, or is not active
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
InvalidSessionIDError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:448:5
assert.that/<@chrome://remote/content/shared/webdriver/Assert.sys.mjs:515:13
assert.session@chrome://remote/content/shared/webdriver/Assert.sys.mjs:37:4
despatch@chrome://remote/content/marionette/server.sys.mjs:315:19
execute@chrome://remote/content/marionette/server.sys.mjs:289:16
onPacket/<@chrome://remote/content/marionette/server.sys.mjs:262:20
onPacket@chrome://remote/content/marionette/server.sys.mjs:263:9
_onJSONObjectReady/<@chrome://remote/content/marionette/transport.sys.mjs:494:20
