<h3>Google Image Scraper for Juypter Notebook</h3>

In [None]:
import os
import json # Added import

from GoogleImageScraper import GoogleImageScraper

from patch import webdriver_executable

webdriver_path = os.path.normpath(os.path.join(os.getcwd(), 'webdriver', webdriver_executable()))
base_image_path = os.path.normpath(os.path.join(os.getcwd(), 'output'))
categories_file_path = os.path.normpath(os.path.join(os.getcwd(), 'categories.json'))

number_of_images = 10
headless = True
min_resolution=(0,0)
max_resolution=(9999,9999)

# Load categories from JSON file
try:
    with open(categories_file_path, 'r') as f:
        categories_data = json.load(f)
except FileNotFoundError:
    print(f"[ERROR] Categories file not found at {categories_file_path}")
    categories_data = {}
except json.JSONDecodeError:
    print(f"[ERROR] Error decoding JSON from {categories_file_path}")
    categories_data = {}

if not categories_data:
    print("[INFO] No categories loaded, scraper will not run.")
else:
    print(f"[INFO] Loaded categories: {list(categories_data.keys())}")

    for category, class_names in categories_data.items():
        print(f"\n[INFO] Processing category: {category}")
        if not isinstance(class_names, list):
            print(f"[WARN] Expected a list of class names for category '{category}', but got {type(class_names)}. Skipping.")
            continue
        
        for class_name in class_names:
            if not isinstance(class_name, str) or not class_name.strip():
                print(f"[WARN] Invalid class name '{class_name}' in category '{category}'. Skipping.")
                continue

            print(f"[INFO] Processing class: {class_name}")
            
            # Construct the specific image path: output/<category>/<class_name>
            current_image_path = os.path.join(base_image_path, category, class_name)
            
            # The search_key is the class_name itself
            search_key = class_name
            
            print(f"[INFO] Scraper params: webdriver_path='{webdriver_path}', image_path='{current_image_path}', search_key='{search_key}', number_of_images={number_of_images}")

            # Ensure the specific directory exists before scraper initialization (scraper also does this, but good practice)
            if not os.path.exists(current_image_path):
                os.makedirs(current_image_path)
                print(f"[INFO] Created directory: {current_image_path}")

            advanced_suffix = " (filipino OR food OR meal)"
            image_scraper = GoogleImageScraper(webdriver_path, current_image_path, search_key, advanced_suffix, number_of_images, headless, min_resolution, max_resolution)
            
            image_urls = image_scraper.find_image_urls()
            
            if image_urls:
                image_scraper.save_images(image_urls, keep_filenames=False) # keep_filenames can be set as needed
            else:
                print(f"[INFO] No image URLs found for {search_key} in category {category}.")
    print("\n[INFO] All categories and classes processed.")
