In [1]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("Torch version:", torch.__version__)
print("Device:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))

CUDA Available: True
Torch version: 2.7.1+cu118
Device: cuda


In [1]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU only")


PyTorch version: 2.7.1+cu118
CUDA available: True
Device name: NVIDIA GeForce RTX 2050


In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler
import os

# Medicinal plants with multiple queries
plant_queries = {
    "Tulsi": ["Tulsi plant", "Tulsi leaf", "Holy Basil plant", "Ocimum tenuiflorum"],
    "Neem": ["Neem plant", "Neem leaf", "Azadirachta indica plant"],
    "Aloe_Vera": ["Aloe Vera plant", "Aloe Vera leaf", "Aloe Barbadensis"],
    "Curry_Leaves": ["Curry leaves plant", "Kadi Patta plant", "Murraya koenigii"],
    "Peppermint": ["Peppermint plant", "Mint leaf", "Mentha piperita"],
    "Betel_Leaf": ["Betel leaf plant", "Paan leaf", "Piper betle"],
    "Coriander": ["Coriander plant", "Cilantro leaves", "Coriandrum sativum"],
    "Lemongrass": ["Lemongrass plant", "Cymbopogon plant"],
    "Hibiscus": ["Hibiscus plant", "Hibiscus flower", "Hibiscus rosa-sinensis"],
    "Brahmi": ["Brahmi plant", "Bacopa monnieri plant"],
    "Gotu_Kola": ["Gotu Kola plant", "Centella asiatica"],
    "Henna": ["Henna plant", "Mehndi leaves", "Lawsonia inermis"],
    "Guava_Leaf": ["Guava leaf", "Psidium guajava leaves"],
    "Omavalli": ["Omavalli plant", "Indian Borage", "Karpooravalli plant"],
    "Bay_Leaf": ["Bay leaf plant", "Tej Patta plant", "Laurus nobilis"]
}

# Base dataset directory
base_dir = "plants_dataset"
os.makedirs(base_dir, exist_ok=True)

# Function to download from one engine
def download_from_engine(crawler_class, query, plant_dir, start_idx, remaining):
    if remaining <= 0:
        return 0
    num_to_download = min(120, remaining)  # max 120 per query
    crawler = crawler_class(storage={"root_dir": plant_dir})
    crawler.crawl(keyword=query, max_num=num_to_download,
                  file_idx_offset=start_idx, min_size=(128, 128))
    return num_to_download

# Loop through plants
for plant, queries in plant_queries.items():
    print(f"\n Collecting images for {plant}...")
    plant_dir = os.path.join(base_dir, plant)
    os.makedirs(plant_dir, exist_ok=True)

    file_idx = 0
    total_downloaded = 0
    max_images = 400  # limit

    for query in queries:
        if total_downloaded >= max_images:
            break
        print(f" Query: {query}")

        # Google
        got = download_from_engine(GoogleImageCrawler, query, plant_dir, file_idx, max_images - total_downloaded)
        total_downloaded += got
        file_idx += got

        if total_downloaded >= max_images:
            break

        # Bing
        got = download_from_engine(BingImageCrawler, query, plant_dir, file_idx, max_images - total_downloaded)
        total_downloaded += got
        file_idx += got

        if total_downloaded >= max_images:
            break

    print(f" {plant}: Downloaded {total_downloaded} images (max 400)")

print("\n All plant images downloaded!")