In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler

google_crawler = GoogleImageCrawler(
    parser_threads=2,
    downloader_threads=4,
    storage={'root_dir': 'folder/camera'}
)
keyword = [
                'salsa and bachata',
                'AI news',
                'cosmos news',
                'crash',
                'books to read',
                'black dresses',
                'movies',
                'coding',
                'painting artworks',
                'birthday party',
                'dancing class',
                'summer activities',
                'winter activities',
                'anime',
                'memes',
                'music',
                'pottery',
                'OOTD',
                'sneakers and accessories',
                'gym workout',
                'yoga',
                'healthy meals',
                'coffee shops',
                'study setups',
                'travel',
                'beaches',
                'mountains',
                'cities'
                ]
   
for keyword in [
                'himym',
                'himym cast',
                'himym best moments',
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')


In [None]:
import os
import requests
from serpapi import GoogleSearch
from urllib.parse import urlparse
from PIL import Image
from io import BytesIO
import time

class GoogleImageAPIScraper:
    def __init__(self, output_dir="daily_life_api", api_key=None):
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        self.api_key = api_key or os.getenv("SERPAPI_KEY")
        self.downloaded_count = 0
    
    def search_google_images_api(self, query, max_images=100):
        params = {
            "q": query,
            "tbm": "isch",
            "ijn": "0",
            "api_key": self.api_key
        }
        
        search = GoogleSearch(params)
        results = search.get_dict()
        image_urls = []
        
        if 'images_results' in results:
            for img in results['images_results'][:max_images]:
                if 'original' in img:
                    image_urls.append(img['original'])
        
        return image_urls
    
    def download_image(self, url):
        """Download and save an image from URL"""
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            
            response = requests.get(url, headers=headers, stream=True, timeout=10)
            response.raise_for_status()
            
            content_type = response.headers.get('content-type', '')
            if 'image' not in content_type:
                return False
                
            img = Image.open(BytesIO(response.content))
            
            # Determine extension
            ext_map = {
                'image/jpeg': '.jpg',
                'image/png': '.png',
                'image/gif': '.gif',
                'image/webp': '.webp'
            }
            ext = ext_map.get(content_type, '.jpg')
            
            filename = f"daily_{self.downloaded_count}{ext}"
            save_path = os.path.join(self.output_dir, filename)
            
            if img.mode in ('RGBA', 'P'):
                img = img.convert('RGB')
            
            img.save(save_path)
            self.downloaded_count += 1
            print(f"Downloaded {self.downloaded_count}: {save_path}")
            return True
            
        except Exception as e:
            print(f"Failed to download {url}: {str(e)}")
            return False
    
    def scrape_and_download(self, query, max_images=100):
        """Main function using API"""
        if not self.api_key:
            raise ValueError("API key is required for this method")
            
        print(f"Searching Google Images via API for: {query}")
        image_urls = self.search_google_images_api(query, max_images)
        
        print(f"\nFound {len(image_urls)} image URLs. Starting downloads...")
        for url in image_urls[:max_images]:
            self.download_image(url)
            time.sleep(0.5)  # Short delay
            
        print(f"\nFinished! Downloaded {self.downloaded_count} images to {self.output_dir}")

if __name__ == "__main__":
    # Get API key from https://serpapi.com/
    api_key = "78a8e7588892f3d800211d5a242e6f33bbdb013166334f3e64deb6bb6d6bf5f9"  # Replace with your actual key
    
    scraper = GoogleImageAPIScraper(output_dir="daily_life_api", api_key=api_key)
    scraper.scrape_and_download("daily life photography", max_images=500)

In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler

google_crawler = GoogleImageCrawler(
    parser_threads=2,
    downloader_threads=10,
    storage={'root_dir': 'sources/photos/extra'}
)

for keyword in [
                "cosmos Painting"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')

In [None]:
import os

def add_prefix_to_images(folder_path, prefix="screenshot"):
    # Traverse through all files in the folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            # Check if the file is an image (you can add more extensions if needed)
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
                # Split filename and extension
                name, ext = os.path.splitext(filename)
                
                # Create new filename
                new_filename = f"{prefix}_{name}{ext}"
                
                # Full paths for old and new names
                old_path = os.path.join(root, filename)
                new_path = os.path.join(root, new_filename)
                
                # Rename the file
                os.rename(old_path, new_path)
                print(f"Renamed: {filename} -> {new_filename}")

# Usage example:
folder_path = r"D:\image_search_engine_ai-end\sources\photos\extra"
add_prefix_to_images(folder_path)

In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler

google_crawler = BingImageCrawler(
    parser_threads=1,
    downloader_threads=1,
    storage={'root_dir': 'sources/photos/freedom'}
)
    
for keyword in [
                "national freedom day celebration",
                "independence day parade",
                " flag waving celebration"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1500, file_idx_offset='auto')
        
for keyword in [
                "freedom day fireworks",
                "Damascus freedom day event",
                " cultural celebration"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1500, file_idx_offset='auto')
    
for keyword in [
                "national day military parade",
                " traditional dance celebration",
                "freedom day concert"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1500, file_idx_offset='auto')
    
for keyword in [
                "historical freedom monuments",
                " people celebrating independence",
                "freedom day decorations"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1500, file_idx_offset='auto')

2025-07-10 04:48:21,509 - INFO - icrawler.crawler - start crawling...
2025-07-10 04:48:21,509 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-07-10 04:48:21,520 - INFO - icrawler.crawler - starting 1 parser threads...
2025-07-10 04:48:21,525 - INFO - icrawler.crawler - starting 1 downloader threads...
2025-07-10 04:48:26,532 - INFO - downloader - downloader-001 is waiting for new download tasks
2025-07-10 04:48:31,037 - INFO - parser - parsing result page https://www.bing.com/images/async?q=national freedom day celebration&first=0
2025-07-10 04:48:40,573 - ERROR - downloader - Exception caught when downloading file https://www.greetingswishes.com/wp-content/uploads/2023/01/National-Freedom-Day-Images.jpg, error: HTTPSConnectionPool(host='www.greetingswishes.com', port=443): Read timed out. (read timeout=5), remaining retry times: 2
2025-07-10 04:48:48,316 - INFO - downloader - image #1	https://www.greetingswishes.com/wp-content/uploads/2023/01/National-Freedom-Day-Images.

2025-07-10 05:05:09,181 - INFO - parser - parsing result page https://www.bing.com/images/async?q=freedom day decorations&first=620
2025-07-10 05:05:11,200 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:13,214 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:15,216 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:17,231 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:19,232 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:21,248 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:23,251 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:25,251 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:27,267 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:29,282 - INFO - parser - parser-001 is waiting for new page urls
2025-07-10 05:05:31,286 - INFO - parser - parser

In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler

google_crawler = BingImageCrawler(
    parser_threads=1,
    downloader_threads=1,
    storage={'root_dir': 'sources/photos/media'}
)
    
for keyword in [
                "black and white fashion photography",
                "monochrome portrait studio lighting",
                "black and white celebrity portraits"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
        
for keyword in [
                "black and white industrial photography",
                "monochrome machinery details",
                "black and white automotive photography"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
    
for keyword in [
                "black and white wildlife photography",
                "monochrome animal portraits",
                "black and white underwater photography"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
    
for keyword in [
                "black and white sports photography",
                "monochrome action shots",
                "black and white dance photography"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')

In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler

google_crawler = BingImageCrawler(
    parser_threads=1,
    downloader_threads=1,
    storage={'root_dir': 'sources/photos/media'}
)
    
for keyword in [
                "lovely golden retriever",
                "cute labrador puppy",
                "adorable beagle dog"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
        
for keyword in [
                "playful corgi dog",
                "happy husky puppy",
                "smiling poodle dog"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
    
for keyword in [
                "fluffy samoyed dog",
                "tiny chihuahua puppy",
                "cuddly shih tzu"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
    
for keyword in [
                "dachshund puppy playing",
                "border collie smiling",
                "pomeranian dog cute"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')

In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler

google_crawler = BingImageCrawler(
    parser_threads=1,
    downloader_threads=1,
    storage={'root_dir': 'sources/photos/media'}
)

        
for keyword in [
                "lean protein meals",
                "whole grain foods",
                "low fat dairy"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
    
for keyword in [
                "superfoods",
                "plant based diet",
                "organic produce"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')
    
for keyword in [
                "balanced meals",
                "heart healthy foods",
                "antioxidant rich foods"
                ]:
    google_crawler.crawl(
        keyword=keyword, max_num=1000, file_idx_offset='auto')