In [None]:
pip install icrawler


In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler, BaiduImageCrawler
from pathlib import Path
import hashlib
import time
import random

PROGRESS_FILE = "progress.txt"

# ------------------------------
# Utility functions
# ------------------------------
def get_image_hash(image_path: Path) -> str | None:
    try:
        return hashlib.md5(image_path.read_bytes()).hexdigest()
    except Exception:
        return None

def load_existing_hashes(folder: Path) -> set[str]:
    hashes = set()
    for file in folder.glob("*.[jp][pn]g"):
        img_hash = get_image_hash(file)
        if img_hash:
            hashes.add(img_hash)
    return hashes

def count_recent_unique_images(folder: Path, existing_hashes: set[str], seconds: int = 3600) -> int:
    now = time.time()
    new_count = 0
    for file in folder.glob("*.[jp][pn]g"):
        if now - file.stat().st_mtime <= seconds:
            img_hash = get_image_hash(file)
            if img_hash and img_hash not in existing_hashes:
                existing_hashes.add(img_hash)
                new_count += 1
    return new_count

def save_progress(celebrity: str, status: str):
    with open(PROGRESS_FILE, "a", encoding="utf-8") as f:
        f.write(f"{celebrity}|{status}\n")

def load_progress() -> dict[str, str]:
    progress = {}
    if Path(PROGRESS_FILE).exists():
        with open(PROGRESS_FILE, "r", encoding="utf-8") as f:
            for line in f:
                if "|" in line:
                    celeb, status = line.strip().split("|", 1)
                    progress[celeb] = status
    return progress

# ------------------------------
# Core image downloading
# ------------------------------
def download_images(celebrity: str, max_images: int = 100) -> bool:
    folder = Path("images") / celebrity.replace(" ", "_")
    folder.mkdir(parents=True, exist_ok=True)

    existing_hashes = load_existing_hashes(folder)
    print(f"[INFO] {celebrity}: Found {len(existing_hashes)} existing images")

    needed = max(0, max_images - len(existing_hashes))
    if needed == 0:
        print(f"[INFO] {celebrity}: Already have enough images")
        return True

    keywords = [
        f"{celebrity} face", f"{celebrity} portrait", f"{celebrity} headshot",
        f"{celebrity} close up", f"{celebrity} photo", f"{celebrity} still",
        f"{celebrity} candid", f"{celebrity} professional photo",
        f"{celebrity} movie still", f"{celebrity} event photo"
    ]
    random.shuffle(keywords)

    search_engines = [
        ("Bing", BingImageCrawler),
        ("Baidu", BaiduImageCrawler),
        ("Google", GoogleImageCrawler),
    ]

    total_downloaded = 0
    max_attempts = 3

    for attempt in range(1, max_attempts + 1):
        if total_downloaded >= needed:
            break
        print(f"[INFO] {celebrity}: Attempt {attempt}")

        for engine_name, Engine in search_engines:
            if total_downloaded >= needed:
                break
            print(f"[INFO] Using {engine_name}")

            for keyword in keywords:
                if total_downloaded >= needed:
                    break
                print(f"   [SEARCH] {keyword}")

                try:
                    crawler = Engine(
                        feeder_threads=1,
                        parser_threads=1,
                        downloader_threads=2,
                        storage={"root_dir": str(folder)},
                    )

                    remaining = needed - total_downloaded
                    try:
                        crawler.crawl(
                            keyword=keyword,
                            max_num=min(50, remaining * 2),
                            min_size=(250, 250),
                            file_idx_offset="auto",
                        )
                    except TypeError as e:
                        if "'NoneType' object is not iterable" in str(e):
                            print(f"   [WARNING] No results for '{keyword}' on {engine_name}, skipping...")
                            continue
                        else:
                            raise e

                    new_unique = count_recent_unique_images(folder, existing_hashes)
                    total_downloaded += new_unique
                    print(f"   [INFO] +{new_unique} unique → {total_downloaded}/{needed}")

                    time.sleep(random.uniform(3, 8))  # polite delay

                except Exception as e:
                    print(f"   [ERROR] {keyword} on {engine_name}: {e}")
                    time.sleep(10)
                    continue

        if total_downloaded < needed:
            wait_time = attempt * 2
            print(f"[INFO] Waiting {wait_time}s before retry...")
            time.sleep(wait_time)

    final_count = len(list(folder.glob("*.[jp][pn]g")))
    success = final_count >= max_images * 0.8
    print(f"[{'SUCCESS' if success else 'PARTIAL'}] {celebrity}: {final_count}/{max_images} images")
    return success

# ------------------------------
# Main script with resume
# ------------------------------
if __name__ == "__main__":
    celebrities = [
        "Robert Downey Jr", "Chris Evans", "Scarlett Johansson", "Tom Holland", "Zendaya",
        "Emma Watson", "Leonardo DiCaprio", "Brad Pitt", "Angelina Jolie", "Jennifer Lawrence",
        "Taylor Swift", "Ariana Grande", "Justin Bieber", "Selena Gomez", "Billie Eilish",
        "Ed Sheeran", "Beyoncé", "Rihanna", "Drake", "Shawn Mendes",
        "Lionel Messi", "Cristiano Ronaldo", "Neymar Jr", "Kylian Mbappé", "Virat Kohli",
        "Serena Williams", "Roger Federer", "LeBron James", "Michael Jordan", "Usain Bolt",
        "Kim Kardashian", "Kylie Jenner", "Dwayne Johnson", "Kevin Hart", "Will Smith",
        "Priyanka Chopra", "Deepika Padukone", "Shahrukh Khan", "Amitabh Bachchan", "Hrithik Roshan",
        "Barack Obama", "Elon Musk", "Jeff Bezos", "Bill Gates", "Mark Zuckerberg",
        "Oprah Winfrey", "Malala Yousafzai", "Pope Francis", "Jackie Chan"
    ]

    progress = load_progress()
    successful, failed = [], []

    for celeb in celebrities:
        if progress.get(celeb) == "SUCCESS":
            print(f"[SKIP] {celeb}: Already completed.")
            successful.append(celeb)
            # Short wait for skipped celeb
            time.sleep(random.uniform(1, 2))
            continue

        print(f"\n{'='*60}\nDownloading images for: {celeb}\n{'='*60}")
        try:
            if download_images(celeb, max_images=100):
                successful.append(celeb)
                save_progress(celeb, "SUCCESS")
            else:
                failed.append(celeb)
                save_progress(celeb, "FAILED")

            # Wait for next celeb (slightly longer for fresh downloads)
            time.sleep(random.uniform(2, 5))

        except Exception as e:
            print(f"[ERROR] Skipping {celeb}: {e}")
            failed.append(celeb)
            save_progress(celeb, "FAILED")
            time.sleep(60)  # long backoff on error

    # Summary
    print(f"\n{'='*60}\nDOWNLOAD SUMMARY\n{'='*60}")
    print(f"Successful: {len(successful)}/{len(celebrities)}")
    print(f"Failed: {len(failed)}/{len(celebrities)}")
    if failed:
        print("Failed downloads:")
        for f in failed:
            print(f"  - {f}")


[SKIP] Robert Downey Jr: Already completed.
[SKIP] Chris Evans: Already completed.
[SKIP] Scarlett Johansson: Already completed.
[SKIP] Tom Holland: Already completed.
[SKIP] Zendaya: Already completed.
[SKIP] Emma Watson: Already completed.
[SKIP] Leonardo DiCaprio: Already completed.
[SKIP] Brad Pitt: Already completed.
[SKIP] Angelina Jolie: Already completed.
[SKIP] Jennifer Lawrence: Already completed.
[SKIP] Taylor Swift: Already completed.
[SKIP] Ariana Grande: Already completed.
[SKIP] Justin Bieber: Already completed.
[SKIP] Selena Gomez: Already completed.
[SKIP] Billie Eilish: Already completed.
[SKIP] Ed Sheeran: Already completed.
[SKIP] Beyoncé: Already completed.
[SKIP] Rihanna: Already completed.
[SKIP] Drake: Already completed.
[SKIP] Shawn Mendes: Already completed.
[SKIP] Lionel Messi: Already completed.
[SKIP] Cristiano Ronaldo: Already completed.


2025-09-06 15:24:05,909 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:24:05,910 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:24:05,912 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:24:05,914 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:24:05,916 - INFO - icrawler.crawler - starting 2 downloader threads...



Downloading images for: Neymar Jr
[INFO] Neymar Jr: Found 0 existing images
[INFO] Neymar Jr: Attempt 1
[INFO] Using Google
   [SEARCH] Neymar Jr headshot


2025-09-06 15:24:06,974 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+headshot&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:24:10,917 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:24:10,918 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:24:10,919 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:24:10,920 - INFO - downloader - thread downloade

   [INFO] +0 unique → 0/100


2025-09-06 15:24:17,579 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:24:17,580 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:24:17,581 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:24:17,583 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:24:17,584 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr photo


2025-09-06 15:24:18,718 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+photo&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:24:22,587 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:24:22,588 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:24:22,588 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:24:22,589 - INFO - downloader - thread downloader-0

   [INFO] +0 unique → 0/100


2025-09-06 15:24:31,357 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:24:31,357 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:24:31,358 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:24:31,360 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:24:31,361 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr professional photo


2025-09-06 15:24:32,107 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+professional+photo&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:24:36,364 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:24:36,365 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:24:36,365 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:24:36,367 - INFO - downloader - thread

   [INFO] +0 unique → 0/100


2025-09-06 15:24:42,254 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:24:42,255 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:24:42,257 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:24:42,258 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:24:42,259 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr event photo


2025-09-06 15:24:43,333 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+event+photo&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:24:47,261 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:24:47,265 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:24:47,265 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:24:47,269 - INFO - downloader - thread downlo

   [INFO] +0 unique → 0/100


2025-09-06 15:24:54,170 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:24:54,172 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:24:54,173 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:24:54,177 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:24:54,178 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr still


2025-09-06 15:24:54,977 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+still&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:24:59,181 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:24:59,183 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:24:59,184 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:24:59,187 - INFO - downloader - thread downloader-0

   [INFO] +0 unique → 0/100


2025-09-06 15:25:04,944 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:25:04,945 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:25:04,947 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:25:04,949 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:25:04,951 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr candid


2025-09-06 15:25:05,992 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+candid&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:25:09,953 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:25:09,955 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:25:09,955 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:25:09,958 - INFO - downloader - thread downloader-

   [INFO] +0 unique → 0/100


2025-09-06 15:25:18,255 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:25:18,256 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:25:18,257 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:25:18,258 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:25:18,260 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr close up


2025-09-06 15:25:19,138 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+close+up&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:25:23,262 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:25:23,264 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:25:23,264 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:25:23,266 - INFO - downloader - thread downloade

   [INFO] +0 unique → 0/100


2025-09-06 15:25:32,026 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:25:32,027 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:25:32,028 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:25:32,029 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:25:32,031 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr face


2025-09-06 15:25:32,714 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+face&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:25:37,034 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:25:37,036 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:25:37,036 - INFO - downloader - thread downloader-002 exit
2025-09-06 15:25:37,039 - INFO - downloader - thread downloader-00

   [INFO] +0 unique → 0/100


2025-09-06 15:25:41,936 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:25:41,937 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:25:41,938 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:25:41,941 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:25:41,942 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr portrait


2025-09-06 15:25:42,760 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+portrait&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:25:46,947 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:25:46,950 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:25:46,951 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:25:46,953 - INFO - downloader - thread downloade

   [INFO] +0 unique → 0/100


2025-09-06 15:25:52,422 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:25:52,423 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:25:52,424 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:25:52,425 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:25:52,427 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr movie still


2025-09-06 15:25:53,488 - INFO - parser - parsing result page https://www.google.com/search?q=Neymar+Jr+movie+still&ijn=0&start=0&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\ProgramData\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\admin\AppData\Roaming\Python\Python312\site-packages\icrawler\parser.py", line 93, in worker_exec
    for task in self.parse(response, **kwargs):
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not iterable
2025-09-06 15:25:57,430 - INFO - downloader - no more download task for thread downloader-001
2025-09-06 15:25:57,431 - INFO - downloader - no more download task for thread downloader-002
2025-09-06 15:25:57,433 - INFO - downloader - thread downloader-001 exit
2025-09-06 15:25:57,435 - INFO - downloader - thread downlo

   [INFO] +0 unique → 0/100


2025-09-06 15:26:05,646 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:26:05,648 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:26:05,649 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:26:05,652 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:26:05,654 - INFO - icrawler.crawler - starting 2 downloader threads...


[INFO] Using Bing
   [SEARCH] Neymar Jr headshot


2025-09-06 15:26:06,271 - INFO - parser - parsing result page https://www.bing.com/images/async?q=Neymar Jr headshot&first=0
2025-09-06 15:26:06,623 - INFO - downloader - image #1	https://cdn.artphotolimited.com/images/5db6c870bd40b8127669aa45/1000x1000/portrait-neymar-jr-2.jpg
2025-09-06 15:26:06,907 - ERROR - downloader - Response status code 403, file https://images.wallpapersden.com/image/download/neymar-jr_am5oaWWUmZqaraWkpJRobWllrWdpZWU.jpg
2025-09-06 15:26:06,956 - INFO - downloader - image #2	https://i.pinimg.com/originals/a5/a9/7a/a5a97aa466ba033cbc21f4f029667995.jpg
2025-09-06 15:26:07,481 - INFO - downloader - image #3	https://i.pinimg.com/originals/f2/e0/97/f2e0978be00813fc9da05e7e65de71a4.jpg
2025-09-06 15:26:07,617 - INFO - downloader - image #4	https://c8.alamy.com/comp/2BB2XT4/neymar-barcelona-2BB2XT4.jpg
2025-09-06 15:26:08,128 - ERROR - downloader - Response status code 400, file https://media.gettyimages.com/id/1201977756/photo/neymar-junior-of-paris-saint-germain-ge

   [INFO] +50 unique → 50/100


2025-09-06 15:26:33,160 - INFO - icrawler.crawler - start crawling...
2025-09-06 15:26:33,161 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-09-06 15:26:33,162 - INFO - feeder - thread feeder-001 exit
2025-09-06 15:26:33,166 - INFO - icrawler.crawler - starting 1 parser threads...
2025-09-06 15:26:33,167 - INFO - icrawler.crawler - starting 2 downloader threads...


   [SEARCH] Neymar Jr photo


2025-09-06 15:26:33,777 - INFO - parser - parsing result page https://www.bing.com/images/async?q=Neymar Jr photo&first=0
2025-09-06 15:26:34,423 - INFO - downloader - image #1	https://images.ctfassets.net/3mv54pzvptwz/5eTv6hTyA1pqkFClYRn0qt/be626a573cbf1ee7e421b73f87ed6851/20221205_foto_GETTY_neymar_jr_jogo_brasil_x_coreia_copa_do_mundo__209_.jpg
2025-09-06 15:26:36,249 - INFO - downloader - image #2	https://wallpaperaccess.com/full/1259911.jpg
2025-09-06 15:26:38,175 - INFO - downloader - image #3	https://i.pinimg.com/originals/83/0c/4a/830c4ab38044c92c3daa37d70fa9147c.jpg
2025-09-06 15:26:38,416 - INFO - downloader - image #4	https://e00-marca.uecdn.es/assets/multimedia/imagenes/2022/09/13/16630884605041.jpg
2025-09-06 15:26:38,525 - ERROR - downloader - Exception caught when downloading file https://www.enwallpaper.com/wp-content/uploads/2023/10/neymar-jr-wallpaper-2.jpg, error: HTTPSConnectionPool(host='www.enwallpaper.com', port=443): Max retries exceeded with url: /wp-content/up