In [24]:
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def read_links_from_file(file_path: str):
    with open(file_path, "r", encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip()]

def scrape_aldi_product(driver, url):
    driver.get(url)
    time.sleep(2)  # Let JS content load

    try:
        wait = WebDriverWait(driver, 10)

        title = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "h1.headline"))).text.strip()
        image = driver.find_element(By.CSS_SELECTOR, "img.productslider-image").get_attribute("src")
        amount = driver.find_element(By.CSS_SELECTOR, "p.product__details__amount").text.strip()

        price_main = driver.find_element(By.CSS_SELECTOR, "span.price__main").text.strip()
        try:
            price_decimals = driver.find_element(By.CSS_SELECTOR, "span.price__decimals").text.strip()
            price = f"{price_main},{price_decimals} €"
        except:
            price = f"{price_main} €"

        return {
            "url": url,
            "title": title,
            "image": image,
            "amount_info": amount,
            "price": price
        }

    except Exception as e:
        print(f"[ERROR] Failed to scrape {url}: {e}")
        return None

def main():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--window-size=1920,1080")

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=options)

    urls = read_links_from_file("aldi-links.txt")
    results = []

    for url in urls:
        print(f"[INFO] Scraping: {url}")
        product = scrape_aldi_product(driver, url)
        if product:
            results.append(product)

    driver.quit()

    with open("aldi-products.json", "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print("[DONE] Saved to aldi-products.json")

if __name__ == "__main__":
    main()


[INFO] Scraping: https://www.aldi-nord.de/produkt/natives-olivenoel-extra-0812-0-0.article.html#/sortiment/nahrungsmittel/backzutaten/fette-oele-essige
[ERROR] Failed to scrape https://www.aldi-nord.de/produkt/natives-olivenoel-extra-0812-0-0.article.html#/sortiment/nahrungsmittel/backzutaten/fette-oele-essige: Message: 
Stacktrace:
0   chromedriver                        0x00000001030343e0 cxxbridge1$str$ptr + 2829900
1   chromedriver                        0x000000010302c6a8 cxxbridge1$str$ptr + 2797844
2   chromedriver                        0x0000000102b69fbc cxxbridge1$string$len + 90140
3   chromedriver                        0x0000000102bb11bc cxxbridge1$string$len + 381468
4   chromedriver                        0x0000000102bf2044 cxxbridge1$string$len + 647332
5   chromedriver                        0x0000000102ba53f8 cxxbridge1$string$len + 332888
6   chromedriver                        0x0000000102ff8804 cxxbridge1$str$ptr + 2585200
7   chromedriver                        0x

KeyboardInterrupt: 