##Daraz Headphone Image Scraper using Selenium and WebDriver Manager
##Author: M. Shariful Islam
##Date: May 2025
##Description: This script scrapes product images (headphones) from daraz.com.bd 
##using Selenium with Chrome and saves them locally.
##It is designed for educational use and GitHub submission.

In [5]:
import os
import time
import requests
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

# ----- CONFIGURATION -----
search_term = "headphone"  # Product to search
max_images = 50            # Number of images to download
folder_name = f"daraz_images_{search_term}"  # Output folder

# ----- SETUP SELENIUM -----
options = Options()
options.add_argument("--headless")  # Run without UI
options.add_argument("--no-sandbox") # Disable sandbox (for Linux/colab)
options.add_argument("--disable-dev-shm-usage") # Avoid shared memory issues

# Automatically install the correct ChromeDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)
# -----------------------------
# OPEN DARAZ AND SEARCH
# -----------------------------

# Build the search URL
url = f"https://www.daraz.com.bd/catalog/?q={search_term}"
driver.get(url)
time.sleep(5)  # Wait for JavaScript to load

# Scroll to load more content
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)

# ----- SCRAPE IMAGES -----
images = driver.find_elements(By.TAG_NAME, "img")

# Create output folder
os.makedirs(folder_name, exist_ok=True)
count = 0

print(f"🖼️ Found {len(images)} image tags. Saving up to {max_images} images...")

# Download loop
# Loop through found images
for img in tqdm(images):
    # Try to get src or data-src attribute
    img_url = img.get_attribute("src") or img.get_attribute("data-src")
    # Only process valid image URLs
    if img_url and img_url.startswith("http"):
        try:
            # Fetch image bytes
            img_data = requests.get(img_url).content
            # Save to file
            with open(f"{folder_name}/image_{count}.jpg", "wb") as f:
                f.write(img_data)
            
            # Update count
            count += 1
            time.sleep(1)  # Be polite to the server
            # Stop if limit reached
            if count >= max_images:
                break
        except Exception as e:
            print(f" Error downloading image {count}: {e}")
# Close the browser
driver.quit()
# Final message
print(f"\n Done! {count} images saved in folder: {folder_name}/")

🖼️ Found 66 image tags. Saving up to 50 images...


100%|██████████| 66/66 [00:34<00:00,  1.89it/s]



 Done! 29 images saved in folder: daraz_images_headphone/
