In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import requests, os, time

def download_istock_images(query, max_images=30, folder='istock_images'):
    os.makedirs(folder, exist_ok=True)
    url = f"https://www.istockphoto.com/search/2/image?phrase={query.replace(' ', '%20')}&alloweduse=availableforalluses&mediatype=photography&sort=best"

    # Setup headless Chrome browser
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    driver.get(url)
    time.sleep(5)  # wait for page to load JS

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()

    img_tags = soup.find_all('img')
    count = 0

    for img in img_tags:
        src = img.get('src')
        if src and 'media.istockphoto.com' in src and count < max_images:
            try:
                img_data = requests.get(src).content
                with open(f"{folder}/{query.replace(' ', '_')}_{count}.jpg", 'wb') as f:
                    f.write(img_data)
                print(f"Downloaded image {count + 1}")
                count += 1
                time.sleep(0.5)
            except Exception as e:
                print(f"Error downloading image {count}: {e}")

    print(f"Downloaded {count} images for query: {query}")

# Example


In [5]:
download_istock_images("ajanta caves", max_images=10)


Downloaded image 1
Downloaded image 2
Downloaded image 3
Downloaded image 4
Downloaded image 5
Downloaded image 6
Downloaded image 7
Downloaded image 8
Downloaded image 9
Downloaded image 10
Downloaded 10 images for query: ajanta caves


In [2]:
import os

# Path to your main images folder
main_folder = r'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\indian_monuments\Indian-monuments\images\test\Hawa mahal'

# Loop through each subfolder (landmark folder)
for class_name in os.listdir(main_folder):
    class_path = os.path.join(main_folder, class_name)

    if os.path.isdir(class_path):
        count = 1
        for filename in os.listdir(class_path):
            file_path = os.path.join(class_path, filename)

            # Ensure it's an image file (e.g., .jpg, .jpeg, .png)
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                # Get file extension
                ext = os.path.splitext(filename)[1]

                # Create a new unique name for each image
                new_name = f"{class_name}_{str(count).zfill(4)}{ext}"

                new_path = os.path.join(class_path, new_name)

                # Rename the image
                os.rename(file_path, new_path)
                count += 1

print("✅ All images renamed successfully!")


✅ All images renamed successfully!


In [2]:
import os

# Path to the training data directory
train_dir = r'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\indian_monuments\Indian-monuments\images\train'

# Loop through each folder in the training directory
for class_folder in os.listdir(train_dir):
    folder_path = os.path.join(train_dir, class_folder)
    
    # Check if it's a directory (in case there are other files)
    if os.path.isdir(folder_path):
        num_images = len(os.listdir(folder_path))  # Count the number of images in the folder
        print(f"Folder: {class_folder} has {num_images} images.")


Folder: Ajanta Caves has 296 images.
Folder: alai_darwaza has 258 images.
Folder: alai_minar has 300 images.
Folder: basilica_of_bom_jesus has 268 images.
Folder: charminar has 300 images.
Folder: Ellora Caves has 300 images.
Folder: Fatehpur Sikri has 242 images.
Folder: Gateway of India has 300 images.
Folder: golden temple has 300 images.
Folder: Hawa mahal has 300 images.
Folder: iron_pillar has 300 images.
Folder: jamali_kamali_tomb has 300 images.
Folder: Khajuraho has 244 images.
Folder: lotus_temple has 300 images.
Folder: mysore_palace has 293 images.
Folder: qutub_minar has 300 images.
Folder: Sun Temple Konark has 300 images.
Folder: tajmahal has 300 images.
Folder: tanjavur temple has 300 images.
Folder: victoria memorial has 300 images.


In [10]:
import os
import random
import shutil

# Path to the training data directory
train_dir = r'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\indian_monuments\Indian-monuments\images\train'

# Loop through each folder in the training directory
for class_folder in os.listdir(train_dir):
    folder_path = os.path.join(train_dir, class_folder)
    
    # Check if it's a directory (in case there are other files)
    if os.path.isdir(folder_path):
        images = os.listdir(folder_path)  # List all images in the folder
        
        if len(images) > 300:
            # If the folder has more than 300 images, randomly select which images to delete
            extra_images = images[300:]  # Get the extra images (more than 300)
            
            # Loop through the extra images and delete them
            for img in extra_images:
                img_path = os.path.join(folder_path, img)
                if os.path.isfile(img_path):
                    os.remove(img_path)  # Delete the image
            print(f"Deleted {len(extra_images)} images from {class_folder}, now it has 300 images.")


Deleted 16 images from alai_minar, now it has 300 images.
Deleted 113 images from charminar, now it has 300 images.
Deleted 10 images from Ellora Caves, now it has 300 images.
Deleted 130 images from Gateway of India, now it has 300 images.
Deleted 109 images from golden temple, now it has 300 images.
Deleted 101 images from hawa mahal pics, now it has 300 images.
Deleted 152 images from iron_pillar, now it has 300 images.
Deleted 62 images from qutub_minar, now it has 300 images.
Deleted 59 images from Sun Temple Konark, now it has 300 images.
Deleted 87 images from tajmahal, now it has 300 images.
Deleted 43 images from tanjavur temple, now it has 300 images.
Deleted 100 images from victoria memorial, now it has 300 images.


In [2]:
import os
import shutil
import random

def extract_top_images(source_dir, destination_dir, num_images=30):
    """
    Extract top `num_images` from each folder in `source_dir` and save them into `destination_dir`.
    
    Parameters:
    - source_dir: Directory containing folders of different places (Indian-monuments/train).
    - destination_dir: Directory where selected images will be saved (database folder).
    - num_images: Number of images to extract from each folder (default is 30).
    """
    # Ensure destination directory exists
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)

    # Get a list of all subdirectories in source_dir
    place_folders = [folder for folder in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, folder))]

    for place in place_folders:
        place_folder_path = os.path.join(source_dir, place)
        
        # Get all images in the place folder (considering only image files like .jpg, .jpeg, .png)
        images = [f for f in os.listdir(place_folder_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        
        # Select top `num_images` (or less if there are fewer)
        top_images = random.sample(images, min(num_images, len(images)))  # random.sample to avoid selecting same image
        
        # Ensure place-specific folder in destination directory exists
        place_dest_folder = os.path.join(destination_dir, place)
        if not os.path.exists(place_dest_folder):
            os.makedirs(place_dest_folder)

        # Copy selected images to the destination folder
        for image in top_images:
            image_source_path = os.path.join(place_folder_path, image)
            image_dest_path = os.path.join(place_dest_folder, image)
            
            # Copy the image to the new folder
            shutil.copy(image_source_path, image_dest_path)
        
        print(f"Copied {len(top_images)} images from '{place}' to '{place_dest_folder}'.")

# Define source directory (Indian-monuments/train) and destination directory (database folder)
source_dir = r"C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\data\Indian-monuments\images\train"
destination_dir = r"C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database"

# Call the function
extract_top_images(source_dir, destination_dir)


Copied 30 images from 'Ajanta Caves' to 'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database\Ajanta Caves'.
Copied 30 images from 'alai_darwaza' to 'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database\alai_darwaza'.
Copied 30 images from 'alai_minar' to 'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database\alai_minar'.
Copied 30 images from 'basilica_of_bom_jesus' to 'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database\basilica_of_bom_jesus'.
Copied 30 images from 'charminar' to 'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database\charminar'.
Copied 30 images from 'Ellora Caves' to 'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database\Ellora Caves'.
Copied 30 images from 'Fatehpur Sikri' to 'C:\Users\Shahnawaz\OneDrive\Desktop\Shanu College\Tourist_landmarks\database\Fatehpur Sikri'.
Copied 30 images from 'Gateway of India' to 'C:\Users\Sha