# Pinterest Image Scraper v4
Enhanced version with improved error handling, better image quality detection, and web interface support.

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
import requests
import json
from urllib.parse import urlparse
import re
from PIL import Image
from io import BytesIO

In [None]:
def setup_driver(headless=False):
    """Set up Chrome WebDriver with optimized options."""
    chrome_options = Options()
    if headless:
        chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--window-size=1920,1080')
    chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
    
    driver = webdriver.Chrome(options=chrome_options)
    return driver

In [None]:
def is_valid_image_url(url):
    """Check if URL points to a valid image."""
    if not url or not url.startswith('http'):
        return False
    
    # Skip Pinterest's placeholder and small images
    skip_patterns = [
        'data:image',
        'placeholder',
        '1x1',
        'loading',
        'avatar',
        'profile'
    ]
    
    for pattern in skip_patterns:
        if pattern in url.lower():
            return False
    
    return True

In [None]:
def download_image(url, folder_name, image_name, min_size=(200, 200)):
    """Download an image with quality checks."""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code == 200:
            # Check image size and quality
            try:
                img = Image.open(BytesIO(response.content))
                if img.size[0] < min_size[0] or img.size[1] < min_size[1]:
                    print(f"Skipped {image_name}: Image too small ({img.size})")
                    return False
            except Exception:
                print(f"Skipped {image_name}: Invalid image format")
                return False
            
            # Save the image
            file_path = os.path.join(folder_name, image_name)
            with open(file_path, "wb") as file:
                file.write(response.content)
            print(f"Downloaded: {image_name} ({img.size})")
            return True
        else:
            print(f"Failed to download: {url} (Status: {response.status_code})")
            return False
    except Exception as e:
        print(f"Error downloading {url}: {e}")
        return False

In [None]:
def pinterest_image_search_v4(query, num_images=10, save_folder="pinterest_downloads", headless=False):
    """Enhanced Pinterest search with better image detection and error handling."""
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    
    driver = setup_driver(headless)
    downloaded_count = 0
    processed_urls = set()
    
    try:
        print(f"Searching for '{query}' on Pinterest...")
        search_url = f"https://www.pinterest.com/search/pins/?q={query.replace(' ', '%20')}"
        driver.get(search_url)
        
        # Wait for initial load
        time.sleep(3)
        
        # Scroll and collect images
        scroll_attempts = 0
        max_scrolls = 10
        
        while downloaded_count < num_images and scroll_attempts < max_scrolls:
            # Find all image elements
            images = driver.find_elements(By.TAG_NAME, "img")
            print(f"Found {len(images)} image elements on page")
            
            for img in images:
                if downloaded_count >= num_images:
                    break
                    
                try:
                    img_url = img.get_attribute("src")
                    if not is_valid_image_url(img_url) or img_url in processed_urls:
                        continue
                    
                    processed_urls.add(img_url)
                    
                    # Generate filename
                    file_extension = "jpg"
                    if ".png" in img_url.lower():
                        file_extension = "png"
                    elif ".webp" in img_url.lower():
                        file_extension = "webp"
                    
                    image_name = f"{query.replace(' ', '_')}_{downloaded_count + 1}.{file_extension}"
                    
                    if download_image(img_url, save_folder, image_name):
                        downloaded_count += 1
                        
                except Exception as e:
                    print(f"Error processing image: {e}")
                    continue
            
            # Scroll down to load more images
            if downloaded_count < num_images:
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)
                scroll_attempts += 1
                print(f"Scrolled {scroll_attempts} times, downloaded {downloaded_count}/{num_images} images")
        
        print(f"\nDownload complete! Downloaded {downloaded_count} images to '{save_folder}' folder.")
        
    except Exception as e:
        print(f"Error during scraping: {e}")
    finally:
        driver.quit()
    
    return downloaded_count

In [None]:
def scrape_with_progress(query, num_images, callback=None):
    """Wrapper function for web interface with progress callback."""
    def progress_callback(current, total, message):
        if callback:
            callback({
                'current': current,
                'total': total,
                'message': message,
                'percentage': int((current / total) * 100) if total > 0 else 0
            })
    
    return pinterest_image_search_v4(query, num_images)

In [None]:
# Interactive execution
if __name__ == "__main__":
    print("Pinterest Image Scraper v4")
    print("=" * 30)
    
    query = input("Enter search query: ").strip()
    if not query:
        print("Please enter a valid search query.")
        exit()
    
    try:
        num_images = int(input("Enter number of images to download (default 10): ") or "10")
        if num_images <= 0:
            print("Number of images must be positive.")
            exit()
    except ValueError:
        print("Please enter a valid number.")
        exit()
    
    folder_name = input("Enter folder name (default 'pinterest_downloads'): ").strip() or "pinterest_downloads"
    
    print(f"\nStarting download of {num_images} images for '{query}'...")
    downloaded = pinterest_image_search_v4(query, num_images, folder_name)
    print(f"\nProcess completed. {downloaded} images downloaded successfully.")