In [2]:
# !pip install duckduckgo_search


# V1. old Script 

In [None]:
import os
import requests
from PIL import Image
from io import BytesIO
from duckduckgo_search import DDGS

# Function to Search Images
def search_images(query, max_images=100):
    urls = []
    with DDGS() as ddgs:
        results = ddgs.images(keywords=query, max_results=max_images)
        for result in results:
            url = result.get("image")
            if url:
                urls.append(url)
    print(f"Found {len(urls)} image URLs for '{query}'")
    return urls

# Car Models You Want
car_models = [
    "Maruti Suzuki Swift", "Maruti Suzuki Baleno", "Mahindra Thar", 
    "Tata Nexon", "Hyundai Creta", "Hyundai i20", "Kia Seltos", 
    "Toyota Fortuner", "Honda City", "BMW X5"
]

# Base Directory to Save
base_dir = "car_dataset/train"
os.makedirs(base_dir, exist_ok=True)

# Function to Download Images
def download_car_images(model_name, max_images=100):
    folder = os.path.join(base_dir, model_name.lower().replace(" ", "_"))
    os.makedirs(folder, exist_ok=True)
    
    urls = search_images(model_name, max_images)
    
    for i, url in enumerate(urls):
        try:
            response = requests.get(url, timeout=10)
            img = Image.open(BytesIO(response.content)).convert("RGB")
            # img = img.resize((256, 256))
            fname = f"{model_name.lower().replace(' ', '_')}_{i}.jpg"
            img.save(os.path.join(folder, fname))
            print(f"Saved image {i+1} for {model_name}")
        except Exception as e:
            print(f"Failed to download image {i+1}: {e}")

# Download Images for All Car Models
for model in car_models:
    print("\n=== Scraping images for:", model)
    download_car_images(model, max_images=100)


# V2.

In [None]:
import os
import requests
import json
from PIL import Image
from io import BytesIO
from duckduckgo_search import DDGS

# Load Custom Queries
with open("car_model_queries.json", "r") as f:
    model_queries = json.load(f)

# Image Search
def search_images(query, max_images=100):
    urls = []
    with DDGS() as ddgs:
        results = ddgs.images(keywords=query, max_results=max_images)
        for result in results:
            url = result.get("image")
            if url:
                urls.append(url)
    return urls

# Save Directory
base_dir = "car_dataset/train"
os.makedirs(base_dir, exist_ok=True)

# Download Function
def download_car_images(model_name, queries, total_images=200):
    folder = os.path.join(base_dir, model_name.lower().replace(" ", "_"))
    os.makedirs(folder, exist_ok=True)

    downloaded = 0
    per_query = max(total_images // len(queries), 1)

    for query in queries:
        urls = search_images(query, max_images=per_query)
        for i, url in enumerate(urls):
            if downloaded >= total_images:
                break
            try:
                response = requests.get(url, timeout=10)
                img = Image.open(BytesIO(response.content)).convert("RGB")
                fname = f"{model_name.lower().replace(' ', '_')}_{downloaded}.jpg"
                img.save(os.path.join(folder, fname))
                downloaded += 1
                print(f"Saved image {downloaded} for {model_name}")
            except Exception as e:
                print(f"Failed to download from {url}: {e}")

# Download for All Models
for model, queries in model_queries.items():
    print(f"\n=== Downloading for: {model}")
    download_car_images(model, queries, total_images=100)


# V3. final working version

In [None]:
import os
import requests
import json
import time
from PIL import Image
from io import BytesIO
from ddgs import DDGS

#  Load Custom Queries 
with open("car_model_queries.json", "r") as f:
    model_queries = json.load(f)

#  Image Search 
def search_images(query, max_images=100, delay=1.0):
    urls = []
    try:
        with DDGS() as ddgs:
            results = ddgs.images(query, max_results=max_images)
            for result in results:
                url = result.get("image")
                if url:
                    urls.append(url)
                time.sleep(delay)  # avoid triggering rate limit
    except Exception as e:
        print(f"[ERROR] Image search failed for query '{query}': {e}")
    return urls

#  Save Directory 
base_dir = "car_dataset/train"
os.makedirs(base_dir, exist_ok=True)

#  Download Function 
def download_car_images(model_name, queries, total_images=300, per_query_limit=100):
    folder = os.path.join(base_dir, model_name.lower().replace(" ", "_"))
    os.makedirs(folder, exist_ok=True)

    # Count already downloaded images
    downloaded = len([f for f in os.listdir(folder) if f.endswith(".jpg")])

    if downloaded >= total_images:
        print(f"⏩ Skipping {model_name} — already has {downloaded} images.")
        return

    per_query = max((total_images - downloaded) // len(queries), 1)

    for query in queries:
        print(f"🔎 Searching for: {query}")
        urls = search_images(query, max_images=min(per_query, per_query_limit))
        print(f"  → Found {len(urls)} image URLs.")

        for url in urls:
            if downloaded >= total_images:
                break
            try:
                response = requests.get(url, timeout=10)
                img = Image.open(BytesIO(response.content)).convert("RGB")

                fname = f"{model_name.lower().replace(' ', '_')}_{downloaded:03d}.jpg"
                img.save(os.path.join(folder, fname))
                downloaded += 1
                print(f"  ✔ Saved image {downloaded} for {model_name}")
            except Exception as e:
                print(f"  ✖ Failed to download image: {e}")
            time.sleep(0.2)

    print(f"✅ Finished downloading for {model_name}: {downloaded} images.\n")


#  Download All Models 
for model, queries in model_queries.items():
    print(f"\n📥 Downloading images for: {model}")
    download_car_images(model, queries, total_images=300)

print("🎉 All downloads completed!")


📥 Downloading images for: Tata Motors Harrier
🔎 Searching for: Tata Motors Harrier front view
  → Found 60 image URLs.
  ✖ Failed to download image: cannot identify image file <_io.BytesIO object at 0x00000212CD26DD50>
  ✖ Failed to download image: cannot identify image file <_io.BytesIO object at 0x00000212CD26E700>
  ✖ Failed to download image: cannot identify image file <_io.BytesIO object at 0x00000212CE642020>
  ✔ Saved image 1 for Tata Motors Harrier
  ✔ Saved image 2 for Tata Motors Harrier
  ✔ Saved image 3 for Tata Motors Harrier
  ✔ Saved image 4 for Tata Motors Harrier
  ✔ Saved image 5 for Tata Motors Harrier
  ✔ Saved image 6 for Tata Motors Harrier
  ✔ Saved image 7 for Tata Motors Harrier
  ✔ Saved image 8 for Tata Motors Harrier
  ✔ Saved image 9 for Tata Motors Harrier
  ✔ Saved image 10 for Tata Motors Harrier
  ✔ Saved image 11 for Tata Motors Harrier
  ✔ Saved image 12 for Tata Motors Harrier
  ✔ Saved image 13 for Tata Motors Harrier
  ✔ Saved image 14 for Tata Mo



  ✔ Saved image 42 for Tata Motors Harrier
  ✔ Saved image 43 for Tata Motors Harrier
  ✔ Saved image 44 for Tata Motors Harrier
  ✔ Saved image 45 for Tata Motors Harrier
  ✔ Saved image 46 for Tata Motors Harrier
  ✔ Saved image 47 for Tata Motors Harrier
  ✔ Saved image 48 for Tata Motors Harrier
  ✔ Saved image 49 for Tata Motors Harrier
  ✔ Saved image 50 for Tata Motors Harrier
  ✖ Failed to download image: cannot identify image file <_io.BytesIO object at 0x00000212CE643600>
  ✔ Saved image 51 for Tata Motors Harrier
  ✔ Saved image 52 for Tata Motors Harrier
  ✔ Saved image 53 for Tata Motors Harrier
🔎 Searching for: Tata Motors Harrier side view
  → Found 60 image URLs.
  ✔ Saved image 54 for Tata Motors Harrier
  ✔ Saved image 55 for Tata Motors Harrier
  ✔ Saved image 56 for Tata Motors Harrier
  ✔ Saved image 57 for Tata Motors Harrier
  ✔ Saved image 58 for Tata Motors Harrier
  ✔ Saved image 59 for Tata Motors Harrier
  ✔ Saved image 60 for Tata Motors Harrier
  ✔ Saved i

## Script to Download 300 total images from multiple non-car categories

In [1]:
import os
import requests
import time
from PIL import Image
from io import BytesIO
from ddgs import DDGS

# -------------------------------
# 🔹 Image Search Function
# -------------------------------
def search_images(query, max_images=100, delay=1.0):
    urls = []
    try:
        with DDGS() as ddgs:
            results = ddgs.images(query, max_results=max_images)
            for result in results:
                url = result.get("image")
                if url:
                    urls.append(url)
                time.sleep(delay)  # avoid rate limit
    except Exception as e:
        print(f"[ERROR] Image search failed for query '{query}': {e}")
    return urls

# -------------------------------
# 🔹 Folder Setup
# -------------------------------
not_car_dir = "./Not_Car"
os.makedirs(not_car_dir, exist_ok=True)

# -------------------------------
# 🔹 Non-Car Categories
# -------------------------------
non_car_classes = [
    "person",
    "nature",
    "animal",
    "architecture",
    "food",
    "sports",
    "technology"
]

total_not_car_images = 300
images_per_class = total_not_car_images // len(non_car_classes)

# -------------------------------
# 🔹 Download Function
# -------------------------------
def download_non_car_images(class_name, num_images, start_index):
    downloaded = 0
    print(f"\n📥 Downloading images for class: {class_name}")
    urls = search_images(class_name, max_images=num_images)

    for url in urls:
        if downloaded >= num_images:
            break
        try:
            response = requests.get(url, timeout=10)
            img = Image.open(BytesIO(response.content)).convert("RGB")

            # Save all in Not_Car with class prefix
            fname = f"{class_name.lower()}_{start_index + downloaded:03d}.jpg"
            img.save(os.path.join(not_car_dir, fname))
            downloaded += 1
            print(f"  ✔ Saved image {start_index + downloaded} as {fname}")
        except Exception as e:
            print(f"  ✖ Failed to download image: {e}")
        time.sleep(0.2)

    return downloaded

# -------------------------------
# 🔹 Start Downloading All Images
# -------------------------------
global_index = 0
for cls in non_car_classes:
    downloaded = download_non_car_images(cls, images_per_class, global_index)
    global_index += downloaded

print("\n🎉 Downloaded", global_index, "images in 'car_dataset/Not_Car/'")



📥 Downloading images for class: person
  ✔ Saved image 1 as person_000.jpg
  ✔ Saved image 2 as person_001.jpg
  ✔ Saved image 3 as person_002.jpg
  ✖ Failed to download image: cannot identify image file <_io.BytesIO object at 0x00000203CDC871A0>
  ✖ Failed to download image: HTTPSConnectionPool(host='virtualtrexpo.com', port=443): Max retries exceeded with url: /wp-content/uploads/2016/03/person-14-983x1024.jpg (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:997)')))
  ✔ Saved image 4 as person_003.jpg
  ✔ Saved image 5 as person_004.jpg
  ✔ Saved image 6 as person_005.jpg
  ✔ Saved image 7 as person_006.jpg
  ✔ Saved image 8 as person_007.jpg
  ✔ Saved image 9 as person_008.jpg
  ✔ Saved image 10 as person_009.jpg
  ✔ Saved image 11 as person_010.jpg
  ✔ Saved image 12 as person_011.jpg
  ✔ Saved image 13 as person_012.jpg
  ✔ Saved image 14 as person_013.jpg
  ✔ Saved image 15 as person_014.jpg
  ✔ Saved image 16 as person



  ✔ Saved image 28 as person_027.jpg
  ✖ Failed to download image: HTTPSConnectionPool(host='img.ctrlcube.com', port=443): Max retries exceeded with url: /cf5f0a62/optimised/ae4117f34793a1e77f455c37fc45ede05a285fa3.jpg (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x00000203CF6ED090>, 'Connection to img.ctrlcube.com timed out. (connect timeout=10)'))
  ✔ Saved image 29 as person_028.jpg
  ✖ Failed to download image: cannot identify image file <_io.BytesIO object at 0x00000203CF632700>
  ✔ Saved image 30 as person_029.jpg
  ✔ Saved image 31 as person_030.jpg
  ✔ Saved image 32 as person_031.jpg
  ✔ Saved image 33 as person_032.jpg
  ✔ Saved image 34 as person_033.jpg
  ✔ Saved image 35 as person_034.jpg

📥 Downloading images for class: nature
  ✔ Saved image 36 as nature_035.jpg
  ✔ Saved image 37 as nature_036.jpg
  ✔ Saved image 38 as nature_037.jpg
  ✔ Saved image 39 as nature_038.jpg
  ✔ Saved image 40 as nature_039.jpg
  ✔ Saved image 41 as nature_040

### Saperate Not_Car images in to Training (70%), Validation (16%), Test (14%).

In [2]:
import os
import random
import shutil

# -------------------------------
# 🔹 Paths
# -------------------------------
source_dir = "./Not_Car"
train_dir = "./split/Training"
val_dir   = "./split/Validation"
test_dir  = "./split/Test"

# -------------------------------
# 🔹 Create Output Folders
# -------------------------------
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# -------------------------------
# 🔹 Get All Image Files
# -------------------------------
all_images = [f for f in os.listdir(source_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
random.shuffle(all_images)

total = len(all_images)
train_count = int(0.70 * total)
val_count   = int(0.16 * total)
test_count  = total - train_count - val_count

# -------------------------------
# 🔹 Split Images
# -------------------------------
train_images = all_images[:train_count]
val_images   = all_images[train_count:train_count + val_count]
test_images  = all_images[train_count + val_count:]

# -------------------------------
# 🔹 Move Files to Split Folders
# -------------------------------
def move_images(file_list, dest_dir):
    for fname in file_list:
        src_path = os.path.join(source_dir, fname)
        dst_path = os.path.join(dest_dir, fname)
        shutil.move(src_path, dst_path)

move_images(train_images, train_dir)
move_images(val_images, val_dir)
move_images(test_images, test_dir)

# -------------------------------
# ✅ Done
# -------------------------------
print(f"✅ Total: {total}")
print(f"✔ Training: {len(train_images)}")
print(f"✔ Validation: {len(val_images)}")
print(f"✔ Test: {len(test_images)}")
print("🎉 Images successfully split and moved!")


✅ Total: 259
✔ Training: 181
✔ Validation: 41
✔ Test: 37
🎉 Images successfully split and moved!
