In [1]:
# Main version: Scrapes Wallapop without saving images
# This version only extracts product data (title, price, link) and saves to CSV
# Faster execution since no image downloading is performed

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from selenium.common.exceptions import TimeoutException, NoSuchElementException

# Set up the Chrome browser
options = Options()
# options.add_argument('--headless')  # Run in background (uncomment if needed)
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")

# Create the driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Parameters - modify these as needed
search_term = "vitus"
min_price = 300
max_price = 1000
location = "madrid"

# Build the URL with filters
url = f"https://es.wallapop.com/app/search?keywords={search_term}&min_sale_price={min_price}&max_sale_price={max_price}&latitude=40.4168&longitude=-3.7038&distance=50000"

print(f"Opening URL: {url}")
driver.get(url)

# Wait for the page to load
time.sleep(5)

# Accept cookies if the dialog appears
try:
    cookie_button = driver.find_element(By.ID, "onetrust-accept-btn-handler")
    cookie_button.click()
    print("Accepted cookies")
    time.sleep(2)
except:
    print("No cookie dialog found or already accepted")

# Take a screenshot to debug
driver.save_screenshot("wallapop_page.png")
print("Screenshot saved as wallapop_page.png")

# Main scraping logic - extract product data without images
items = []
try:
    print("Looking for product links (anchor tags)...")
    # Wait for product links to appear
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.ItemCardList__item")))
    
    # Get all product links
    product_links = driver.find_elements(By.CSS_SELECTOR, "a.ItemCardList__item")
    print(f"Found {len(product_links)} product links")
    
    # Extract data from each product link
    for link in product_links:
        try:
            # Get the href attribute from the anchor tag
            href = link.get_attribute("href")
            
            # Get the title from the title attribute
            title = link.get_attribute("title")
            
            # Find the price element within this link
            price_element = link.find_element(By.CSS_SELECTOR, ".ItemCard__price")
            price = price_element.text.replace('€', '').strip()
            
            items.append({
                "title": title,
                "price": price,
                "link": href
            })
            
            print(f"{title} - {price}€ - {href}")
        except Exception as e:
            print(f"Error extracting product data: {e}")
    
except TimeoutException:
    print("Couldn't find product links with a.ItemCardList__item selector")

print(f"\nTotal results: {len(items)}")

# Convert to DataFrame and save
if items:
    df = pd.DataFrame(items)
    df.to_csv("wallapop_results_no_images.csv", index=False)
    print("Results saved to wallapop_results_no_images.csv")
else:
    print("No items found to save.")

# Pause to keep the browser open for inspection if you need to debug
print("\nBrowser will remain open for 10 seconds for inspection...")
time.sleep(10)

# Close the browser
driver.quit()

Opening URL: https://es.wallapop.com/app/search?keywords=vitus&min_sale_price=300&max_sale_price=1000&latitude=40.4168&longitude=-3.7038&distance=50000
Accepted cookies
Screenshot saved as wallapop_page.png
Looking for product links (anchor tags)...
Found 40 product links
Bicicleta Vitus Aluminio - Talla L - 380,00€ - https://es.wallapop.com/item/bicicleta-vitus-aluminio-talla-l-1142233307
Bicicleta carretera Vitus 992 - 300,00€ - https://es.wallapop.com/item/bicicleta-carretera-vitus-992-1141922748
Bicicleta VITUS  - 435,00€ - https://es.wallapop.com/item/bicicleta-vitus-979-1124219046
bicicleta carretera vitus 992 - 909,30€ - https://es.wallapop.com/item/bicicleta-carretera-vitus-992-1128756842
Bicicleta carretera Vitus 979 - 300,00€ - https://es.wallapop.com/item/bicicleta-carretera-vitus-1134190027
Bicicleta Vitus 992 Ovoid - 700,00€ - https://es.wallapop.com/item/bicicleta-vitus-992-ovoid-1135478467
Bicicleta Vitus Dural, equipada con Campagnolo - 550,00€ - https://es.wallapop.com

In [2]:
# Main version: Scrapes Wallapop and saves images
# This version extracts product data AND downloads product images to local storage
# Takes longer to execute but provides complete product information with images

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import os
import requests
from urllib.parse import urlparse
from selenium.common.exceptions import TimeoutException, NoSuchElementException

# Create directory for images if it doesn't exist
images_dir = "product_images"
if not os.path.exists(images_dir):
    os.makedirs(images_dir)
    print(f"Created directory {images_dir} for product images")

# Set up the Chrome browser
options = Options()
# options.add_argument('--headless')  # Run in background (uncomment if needed)
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")

# Create the driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Parameters - modify these as needed
search_term = "vitus"
min_price = 300
max_price = 1000
location = "madrid"

# Build the URL with filters
url = f"https://es.wallapop.com/app/search?keywords={search_term}&min_sale_price={min_price}&max_sale_price={max_price}&latitude=40.4168&longitude=-3.7038&distance=50000"

print(f"Opening URL: {url}")
driver.get(url)

# Wait for the page to load
time.sleep(5)

# Accept cookies if the dialog appears
try:
    cookie_button = driver.find_element(By.ID, "onetrust-accept-btn-handler")
    cookie_button.click()
    print("Accepted cookies")
    time.sleep(2)
except:
    print("No cookie dialog found or already accepted")

# Take a screenshot to debug
driver.save_screenshot("wallapop_page.png")
print("Screenshot saved as wallapop_page.png")

# Main scraping logic - extract product data and download images
items = []
try:
    print("Looking for product links (anchor tags)...")
    # Wait for product links to appear
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.ItemCardList__item")))
    
    # Get all product links
    product_links = driver.find_elements(By.CSS_SELECTOR, "a.ItemCardList__item")
    print(f"Found {len(product_links)} product links")
    
    # Extract data from each product link and download images
    for i, link in enumerate(product_links):
        try:
            # Get the href attribute from the anchor tag
            href = link.get_attribute("href")
            
            # Get the title from the title attribute
            title = link.get_attribute("title")
            
            # Find the price element within this link
            price_element = link.find_element(By.CSS_SELECTOR, ".ItemCard__price")
            price = price_element.text.replace('€', '').strip()
            
            # Find the image element and get its source URL
            image_url = None
            try:
                # Try to find the image within the current carousel slide
                img_element = link.find_element(By.CSS_SELECTOR, "img")
                image_url = img_element.get_attribute("src")
            except NoSuchElementException:
                print(f"Could not find image for {title}")
            
            # Download the image if URL was found
            image_filename = None
            if image_url:
                try:
                    # Create a safe filename from the title
                    safe_title = "".join(c if c.isalnum() or c in [' ', '-', '_'] else '_' for c in title)
                    safe_title = safe_title[:50]  # Limit length
                    
                    # Extract extension from URL or default to .jpg
                    parsed_url = urlparse(image_url)
                    path = parsed_url.path
                    extension = os.path.splitext(path)[1]
                    if not extension or len(extension) > 5:  # If no extension or it looks invalid
                        extension = ".jpg"
                    
                    # Create filename
                    image_filename = f"{i+1}_{safe_title}{extension}"
                    image_path = os.path.join(images_dir, image_filename)
                    
                    # Download image
                    response = requests.get(image_url, timeout=10)
                    if response.status_code == 200:
                        with open(image_path, 'wb') as f:
                            f.write(response.content)
                        print(f"Downloaded image: {image_filename}")
                    else:
                        print(f"Failed to download image: {response.status_code}")
                        image_filename = None
                except Exception as e:
                    print(f"Error downloading image: {e}")
                    image_filename = None
            
            items.append({
                "title": title,
                "price": price,
                "link": href,
                "image_url": image_url,
                "image_filename": image_filename
            })
            
            print(f"{title} - {price}€ - {href} - Image: {'Yes' if image_url else 'No'}")
            
        except Exception as e:
            print(f"Error extracting product data: {e}")
    
except TimeoutException:
    print("Couldn't find product links with a.ItemCardList__item selector")

print(f"\nTotal results: {len(items)}")

# Convert to DataFrame and save
if items:
    df = pd.DataFrame(items)
    df.to_csv("wallapop_results_with_images.csv", index=False)
    print("Results saved to wallapop_results_with_images.csv")
else:
    print("No items found to save.")

# Pause to keep the browser open for inspection
print("\nBrowser will remain open for 10 seconds for inspection...")
time.sleep(10)

# Close the browser
driver.quit()

Created directory product_images for product images
Opening URL: https://es.wallapop.com/app/search?keywords=vitus&min_sale_price=300&max_sale_price=1000&latitude=40.4168&longitude=-3.7038&distance=50000
Accepted cookies
Screenshot saved as wallapop_page.png
Looking for product links (anchor tags)...
Found 40 product links
Downloaded image: 1_Bicicleta Vitus Aluminio - Talla L.jpg
Bicicleta Vitus Aluminio - Talla L - 380,00€ - https://es.wallapop.com/item/bicicleta-vitus-aluminio-talla-l-1142233307 - Image: Yes
Downloaded image: 2_Bicicleta carretera Vitus 992.jpg
Bicicleta carretera Vitus 992 - 300,00€ - https://es.wallapop.com/item/bicicleta-carretera-vitus-992-1141922748 - Image: Yes
Downloaded image: 3_Bicicleta VITUS .jpg
Bicicleta VITUS  - 435,00€ - https://es.wallapop.com/item/bicicleta-vitus-979-1124219046 - Image: Yes
Downloaded image: 4_bicicleta carretera vitus 992.jpg
bicicleta carretera vitus 992 - 909,30€ - https://es.wallapop.com/item/bicicleta-carretera-vitus-992-112875