In [1]:
# Install required packages
!pip install tqdm
!apt-get update
!apt-get install -y wget
!pip install selenium
!apt-get install -y chromium-browser
!apt-get install -y chromium-chromedriver

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from bs4 import BeautifulSoup  # For parsing HTML content
from urllib.parse import urljoin, urlparse  # For handling URLs
import urllib.request  # For making HTTP requests
import time  # For handling time-related operations
import os  # For interacting with the operating system (relate to dir, folder, file)
from tqdm import tqdm  # For displaying progress bars (visualize progress)
import concurrent.futures  # For multi-threading
import json  # For writing to a text file
from PIL import Image  # For handling images

Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:6 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Ign:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy Release [5,713 B]
Hit:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 https://r2u.stat.illinois.edu/ubuntu jammy Release.gpg [793 B]
Get:13 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [2,449 kB]
Get:14 https://ppa.launchpad

In [2]:
class UrlScraper:
    # Constructor
    def __init__(self, url_template, max_images=50, max_workers=4):
        self.url_template = url_template  # Link crawl
        self.max_images = max_images  # Max images
        self.max_workers = max_workers  # Thread
        self.setup_environment()  # Call for set up environment

    # Set up environment for selenium
    def setup_environment(self):
        os.environ['PATH'] += ':/usr/lib/chromium-browser/'
        os.environ['PATH'] += ':/usr/lib/chromium-browser/chromedriver/'

    def get_url_images(self, term):
        """
        Crawl the URLs of images by term

        Parameters:
        term (str): The name of the animal, plant, scenery, or furniture

        Returns:
        urls (list): List of URLs of images
        """

        # Initialize Chrome driver
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        driver = webdriver.Chrome(options=options)

        url = self.url_template.format(search_term=term)
        driver.get(url)

        # Start crawling URLs of images
        urls = []
        more_content_available = True

        pbar = tqdm(total=self.max_images, desc=f"Fetching images for {term}")  # Set up for visualizing progress

        while len(urls) < self.max_images and more_content_available:
            soup = BeautifulSoup(driver.page_source, "html.parser")
            img_tags = soup.find_all("img")

            for img in img_tags:
                if len(urls) >= self.max_images:
                    break
                if 'src' in img.attrs:
                    href = img.attrs['src']
                    img_path = urljoin(url, href)
                    img_path = img_path.replace("_m.jpg", "_b.jpg").replace("_n.jpg", "_b.jpg").replace("_w.jpg", "_b.jpg")
                    if img_path == "https://combo.staticflickr.com/ap/build/images/getty/IStock_corporate_logo.svg":
                        continue
                    urls.append(img_path)
                    pbar.update(1)

            try:
                load_more_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, '//button[@id="yui_3_16_0_1_1721642285931_28620"]'))
                )
                load_more_button.click()
                time.sleep(2)
            except:
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)

            new_soup = BeautifulSoup(driver.page_source, "html.parser")
            new_img_tags = new_soup.find_all("img", loading_="lazy")
            if len(new_img_tags) == len(img_tags):
                more_content_available = False
            img_tags = new_img_tags

        pbar.close()
        driver.quit()
        return urls

    def scrape_urls(self, categories):
        """
        Call the get_url_images method to retrieve all URLs of objects in categories

        Parameters:
        categories (dict): Dictionary of all objects to collect images with format
                           categories{"name_object": [value1, value2, ...]}

        Returns:
        all_urls (dict): Dictionary containing image URLs
        """
        all_urls = {category: {} for category in categories}

        # Use multithreading for efficiency
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            future_to_term = {executor.submit(self.get_url_images, term): (category, term)
                              for category, terms in categories.items() for term in terms}

            for future in tqdm(concurrent.futures.as_completed(future_to_term), total=len(future_to_term), desc="Overall Progress"):
                category, term = future_to_term[future]
                try:
                    urls = future.result()
                    all_urls[category][term] = urls
                    print(f"\nNumber of images retrieved for {term}: {len(urls)}")
                except Exception as exc:
                    print(f"\n{term} generated an exception: {exc}")
        return all_urls

    def save_to_file(self, data, filename):
        """
        Save data to a JSON file.

        Parameters:
        data (dict): Data to be saved.
        filename (str): Name of the JSON file.

        Returns:
        None
        """
        with open(filename, 'w') as file:
            json.dump(data, file, indent=4)
        print(f"Data saved to {filename}")


In [3]:
categories = {
    "animal": ["Monkey", "Elephant", "cows", "Cat", "Dog", "bear", "fox", "Civet", "Pangolins", "Rabbit", "Bats", "Whale", "Cock", "Owl", "flamingo", "Lizard", "Turtle", "Snake", "Frog", "Fish", "shrimp", "Crab", "Snail", "Coral", "Jellyfish", "Butterfly", "Flies", "Mosquito", "Ants", "Cockroaches", "Spider", "scorpion", "tiger", "bird", "horse", "pig", "Alligator", "Alpaca", "Anteater", "donkey", "Bee", "Buffalo", "Camel", "Caterpillar", "Cheetah", "Chicken", "Dragonfly", "Duck", "panda", "Giraffe"],
    "plant": ["Bamboo", "Apple", "Apricot", "Banana", "Bean", "Wildflower", "Flower", "Mushroom", "Weed", "Fern", "Reed", "Shrub", "Moss", "Grass", "Palmtree", "Corn", "Tulip", "Rose", "Clove", "Dogwood", "Durian", "Ferns", "Fig", "Flax", "Frangipani", "Lantana", "Hibiscus", "Bougainvillea", "Pea", "OrchidTree", "RangoonCreeper", "Jackfruit", "Cottonplant", "Corneliantree", "Coffeeplant", "Coconut", "wheat", "watermelon", "radish", "carrot"],
    "furniture": ["bed", "cabinet", "chair", "chests", "clock", "desks", "table", "Piano", "Bookcase", "Umbrella", "Clothes", "cart", "sofa", "ball", "spoon", "Bowl", "fridge", "pan", "book"],
    "scenery": ["Cliff", "Bay", "Coast", "Mountains", "Forests", "Waterbodies", "Lake", "desert", "farmland", "river", "hedges", "plain", "sky", "cave", "cloud", "flowergarden", "glacier", "grassland", "horizon", "lighthouse", "plateau", "savannah", "valley", "volcano", "waterfall"]
}

urltopic = {"flickr": "https://www.flickr.com/search/?text={search_term}"}
scraper = UrlScraper(url_template=urltopic["flickr"], max_images=2, max_workers=5)
image_urls = scraper.scrape_urls(categories)
scraper.save_to_file(image_urls, 'image_urls.json')

Overall Progress:   0%|          | 0/134 [00:00<?, ?it/s]
Fetching images for Monkey:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Monkey:  50%|█████     | 1/2 [00:03<00:03,  3.45s/it][A

Fetching images for Dog:   0%|          | 0/2 [00:00<?, ?it/s][A[A


Fetching images for cows:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A

Fetching images for Dog:  50%|█████     | 1/2 [00:00<00:00,  1.08it/s][A[A


Fetching images for Monkey: 100%|██████████| 2/2 [00:15<00:00,  7.88s/it]
Overall Progress:   1%|          | 1/134 [01:00<2:14:26, 60.65s/it]


Number of images retrieved for Monkey: 2



Fetching images for bear:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Dog: 100%|██████████| 2/2 [00:14<00:00,  7.12s/it]
Overall Progress:   1%|▏         | 2/134 [01:06<1:02:24, 28.37s/it]


Number of images retrieved for Dog: 2


Fetching images for cows: 100%|██████████| 2/2 [00:14<00:00,  7.28s/it]
Overall Progress:   2%|▏         | 3/134 [01:06<34:13, 15.67s/it]  


Number of images retrieved for cows: 2




Fetching images for fox:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for fox:  50%|█████     | 1/2 [00:01<00:00,  1.01it/s][A[A


Fetching images for bear: 100%|██████████| 2/2 [00:16<00:00,  8.18s/it]
Overall Progress:   3%|▎         | 4/134 [01:18<30:45, 14.20s/it]


Number of images retrieved for bear: 2





Fetching images for Civet:  50%|█████     | 1/2 [00:00<00:00,  1.40it/s][A[A[A
Fetching images for Pangolins:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for fox: 100%|██████████| 2/2 [00:14<00:00,  7.35s/it]
Overall Progress:   4%|▎         | 5/134 [01:28<27:16, 12.69s/it]


Number of images retrieved for fox: 2


Fetching images for Civet: 100%|██████████| 2/2 [00:13<00:00,  6.74s/it]
Overall Progress:   4%|▍         | 6/134 [01:32<20:08,  9.44s/it]


Number of images retrieved for Civet: 2




Fetching images for Bats:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Bats:  50%|█████     | 1/2 [00:00<00:00,  6.03it/s][A[A


Fetching images for Rabbit:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Pangolins: 100%|██████████| 2/2 [00:13<00:00,  6.59s/it]
Overall Progress:   5%|▌         | 7/134 [01:39<18:44,  8.86s/it]


Number of images retrieved for Pangolins: 2



Fetching images for Whale:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Bats: 100%|██████████| 2/2 [00:15<00:00,  7.74s/it]
Overall Progress:   6%|▌         | 8/134 [01:51<20:39,  9.83s/it]


Number of images retrieved for Bats: 2


Fetching images for Rabbit: 100%|██████████| 2/2 [00:15<00:00,  7.80s/it]
Overall Progress:   7%|▋         | 9/134 [01:52<14:31,  6.97s/it]


Number of images retrieved for Rabbit: 2


Fetching images for Whale: 100%|██████████| 2/2 [00:15<00:00,  7.94s/it]
Overall Progress:   7%|▋         | 10/134 [02:01<15:39,  7.58s/it]


Number of images retrieved for Whale: 2



Fetching images for Cock:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Cock:  50%|█████     | 1/2 [00:01<00:01,  1.02s/it][A

Fetching images for Owl:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Owl:  50%|█████     | 1/2 [00:00<00:00,  1.55it/s][A[A


Fetching images for flamingo:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Cock: 100%|██████████| 2/2 [00:13<00:00,  6.98s/it]
Overall Progress:   8%|▊         | 11/134 [02:18<21:46, 10.62s/it]


Number of images retrieved for Cock: 2


Fetching images for Owl: 100%|██████████| 2/2 [00:13<00:00,  6.96s/it]
Overall Progress:   9%|▉         | 12/134 [02:19<15:40,  7.71s/it]


Number of images retrieved for Owl: 2


Fetching images for flamingo: 100%|██████████| 2/2 [00:15<00:00,  7.97s/it]
Overall Progress:  10%|▉         | 13/134 [02:24<13:32,  6.72s/it]


Number of images retrieved for flamingo: 2



Fetching images for Lizard:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Lizard:  50%|█████     | 1/2 [00:01<00:00,  1.00it/s][A

Fetching images for Turtle:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Turtle:  50%|█████     | 1/2 [00:00<00:00,  1.33it/s][A[A


Fetching images for Snake:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Lizard: 100%|██████████| 2/2 [00:13<00:00,  6.68s/it]
Overall Progress:  10%|█         | 14/134 [02:44<21:28, 10.73s/it]


Number of images retrieved for Lizard: 2


Fetching images for Turtle: 100%|██████████| 2/2 [00:13<00:00,  6.82s/it]

Overall Progress:  11%|█         | 15/134 [02:48<17:40,  8.91s/it]


Number of images retrieved for Turtle: 2



Fetching images for Snake: 100%|██████████| 2/2 [00:14<00:00,  7.38s/it]
Overall Progress:  12%|█▏        | 16/134 [02:53<14:59,  7.63s/it]


Number of images retrieved for Snake: 2




Fetching images for Fish:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Fish:  50%|█████     | 1/2 [00:00<00:00,  1.56it/s][A[A


Fetching images for Cat:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Cat:  50%|█████     | 1/2 [00:01<00:01,  1.31s/it][A[A[A



Fetching images for shrimp:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Frog: 100%|██████████| 2/2 [00:16<00:00,  8.30s/it]
Overall Progress:  13%|█▎        | 17/134 [03:05<17:27,  8.95s/it]


Number of images retrieved for Frog: 2


Fetching images for Fish: 100%|██████████| 2/2 [00:14<00:00,  7.36s/it]
Overall Progress:  13%|█▎        | 18/134 [03:13<16:22,  8.47s/it]


Number of images retrieved for Fish: 2



Fetching images for Crab:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Cat: 100%|██████████| 2/2 [00:14<00:00,  7.07s/it]
Overall Progress:  14%|█▍        | 19/134 [03:16<13:14,  6.91s/it]


Number of images retrieved for Cat: 2


Fetching images for shrimp: 100%|██████████| 2/2 [00:14<00:00,  7.10s/it]
Overall Progress:  15%|█▍        | 20/134 [03:17<10:03,  5.30s/it]


Number of images retrieved for shrimp: 2




Fetching images for Snail:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Snail:  50%|█████     | 1/2 [00:01<00:01,  1.09s/it][A[A


Fetching images for Elephant:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A



Fetching images for Coral:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A


Fetching images for Elephant:  50%|█████     | 1/2 [00:02<00:02,  2.90s/it][A[A[A



Fetching images for Crab: 100%|██████████| 2/2 [00:17<00:00,  8.62s/it]

Fetching images for Jellyfish:   0%|          | 0/2 [00:00<?, ?it/s][A
Overall Progress:  16%|█▌        | 21/134 [03:32<15:09,  8.05s/it]


Number of images retrieved for Crab: 2


Fetching images for Snail: 100%|██████████| 2/2 [00:15<00:00,  7.58s/it]
Overall Progress:  16%|█▋        | 22/134 [03:35<12:18,  6.59s/it]


Number of images retrieved for Snail: 2




Fetching images for Butterfly:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Butterfly:  50%|█████     | 1/2 [00:00<00:00,  3.04it/s][A[A




Fetching images for Flies:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Elephant: 100%|██████████| 2/2 [00:18<00:00,  9.02s/it]
Overall Progress:  17%|█▋        | 23/134 [03:44<13:48,  7.46s/it]


Number of images retrieved for Elephant: 2


Fetching images for Coral: 100%|██████████| 2/2 [00:17<00:00,  8.64s/it]
Overall Progress:  18%|█▊        | 24/134 [03:46<10:14,  5.59s/it]


Number of images retrieved for Coral: 2


Fetching images for Jellyfish: 100%|██████████| 2/2 [00:15<00:00,  7.59s/it]
Overall Progress:  19%|█▊        | 25/134 [03:46<07:21,  4.05s/it]


Number of images retrieved for Jellyfish: 2



Fetching images for Butterfly: 100%|██████████| 2/2 [00:18<00:00,  9.33s/it]
Overall Progress:  19%|█▉        | 26/134 [03:59<12:10,  6.76s/it]


Number of images retrieved for Butterfly: 2




Fetching images for Mosquito:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Flies: 100%|██████████| 2/2 [00:20<00:00, 10.43s/it]
Overall Progress:  20%|██        | 27/134 [04:03<10:11,  5.72s/it]


Number of images retrieved for Flies: 2





Fetching images for Spider:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Spider:  50%|█████     | 1/2 [00:00<00:00,  1.11it/s][A[A[A



Fetching images for scorpion:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Mosquito: 100%|██████████| 2/2 [00:15<00:00,  7.67s/it]
Overall Progress:  21%|██        | 28/134 [04:15<13:35,  7.69s/it]


Number of images retrieved for Mosquito: 2




Fetching images for tiger:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Spider: 100%|██████████| 2/2 [00:14<00:00,  7.20s/it]
Overall Progress:  22%|██▏       | 29/134 [04:25<14:56,  8.54s/it]


Number of images retrieved for Spider: 2


Fetching images for scorpion: 100%|██████████| 2/2 [00:14<00:00,  7.01s/it]
Overall Progress:  22%|██▏       | 30/134 [04:27<11:02,  6.37s/it]


Number of images retrieved for scorpion: 2


Fetching images for tiger: 100%|██████████| 2/2 [00:15<00:00,  7.91s/it]
Overall Progress:  23%|██▎       | 31/134 [04:34<11:36,  6.76s/it]


Number of images retrieved for tiger: 2




Fetching images for bird:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for bird:  50%|█████     | 1/2 [00:01<00:01,  1.26s/it][A[A


Fetching images for horse:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for horse:  50%|█████     | 1/2 [00:01<00:01,  1.04s/it][A[A[A



Fetching images for pig:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for bird: 100%|██████████| 2/2 [00:14<00:00,  7.08s/it]
Overall Progress:  24%|██▍       | 32/134 [04:53<17:49, 10.49s/it]


Number of images retrieved for bird: 2


Fetching images for horse: 100%|██████████| 2/2 [00:13<00:00,  6.87s/it]
Overall Progress:  25%|██▍       | 33/134 [04:54<12:48,  7.61s/it]


Number of images retrieved for horse: 2


Fetching images for pig: 100%|██████████| 2/2 [00:16<00:00,  8.24s/it]
Overall Progress:  25%|██▌       | 34/134 [05:03<12:57,  7.78s/it]


Number of images retrieved for pig: 2




Fetching images for Alligator:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Alligator:  50%|█████     | 1/2 [00:01<00:01,  1.11s/it][A[A


Fetching images for Alpaca:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Alpaca:  50%|█████     | 1/2 [00:00<00:00,  1.05it/s][A[A[A



Fetching images for Anteater:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Alligator: 100%|██████████| 2/2 [00:13<00:00,  6.91s/it]
Overall Progress:  26%|██▌       | 35/134 [05:19<17:16, 10.47s/it]


Number of images retrieved for Alligator: 2


Fetching images for Alpaca: 100%|██████████| 2/2 [00:13<00:00,  6.69s/it]
Overall Progress:  27%|██▋       | 36/134 [05:21<12:43,  7.79s/it]


Number of images retrieved for Alpaca: 2




Fetching images for donkey:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Anteater: 100%|██████████| 2/2 [00:13<00:00,  6.83s/it]
Overall Progress:  28%|██▊       | 37/134 [05:27<11:48,  7.30s/it]


Number of images retrieved for Anteater: 2





Fetching images for Bee:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Bee:  50%|█████     | 1/2 [00:00<00:00,  1.88it/s][A[A[A



Fetching images for Buffalo:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for donkey: 100%|██████████| 2/2 [00:14<00:00,  7.10s/it]
Overall Progress:  28%|██▊       | 38/134 [05:40<14:10,  8.86s/it]


Number of images retrieved for donkey: 2


Fetching images for Bee: 100%|██████████| 2/2 [00:13<00:00,  6.69s/it]
Overall Progress:  29%|██▉       | 39/134 [05:41<10:34,  6.68s/it]


Number of images retrieved for Bee: 2


Fetching images for Buffalo: 100%|██████████| 2/2 [00:14<00:00,  7.27s/it]
Overall Progress:  30%|██▉       | 40/134 [05:49<11:14,  7.17s/it]


Number of images retrieved for Buffalo: 2




Fetching images for Camel:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Camel:  50%|█████     | 1/2 [00:00<00:00,  1.29it/s][A[A


Fetching images for Caterpillar:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Caterpillar:  50%|█████     | 1/2 [00:00<00:00,  2.28it/s][A[A[A



Fetching images for Cheetah:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Camel: 100%|██████████| 2/2 [00:13<00:00,  6.97s/it]
Overall Progress:  31%|███       | 41/134 [06:04<14:30,  9.36s/it]


Number of images retrieved for Camel: 2


Fetching images for Caterpillar: 100%|██████████| 2/2 [00:13<00:00,  6.94s/it]
Overall Progress:  31%|███▏      | 42/134 [06:05<10:35,  6.91s/it]


Number of images retrieved for Caterpillar: 2


Fetching images for Cheetah: 100%|██████████| 2/2 [00:16<00:00,  8.07s/it]
Overall Progress:  32%|███▏      | 43/134 [06:10<09:41,  6.39s/it]


Number of images retrieved for Cheetah: 2




Fetching images for Ants:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Ants:  50%|█████     | 1/2 [00:02<00:02,  2.98s/it][A[A


Fetching images for Chicken:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A



Fetching images for Dragonfly:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Dragonfly:  50%|█████     | 1/2 [00:01<00:00,  1.32it/s][A[A[A[A



Fetching images for Dragonfly: 100%|██████████| 2/2 [00:01<00:00,  2.08it/s][A[A[A[A


Fetching images for Chicken:  50%|█████     | 1/2 [00:01<00:01,  1.74s/it][A[A[A




Fetching images for Duck:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Cockroaches:   0%|          | 0/2 [02:38<?, ?it/s]
Overall Progress:  33%|███▎      | 44/134 [06:27<14:01,  9.35s/it]


Number of images retrieved for Cockroaches: 0


Fetching images for Ants: 100%|██████████| 2/2 [00:15<00:00,  7.97s/it]
Overall Progress:  34%|███▎      | 45/134 [06:30<11:17,  7.62s/it]


Number of images retrieved for Ants: 2



Fetching images for panda:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Dragonfly: 100%|██████████| 2/2 [00:16<00:00,  8.21s/it]
Overall Progress:  34%|███▍      | 46/134 [06:37<11:04,  7.55s/it]


Number of images retrieved for Dragonfly: 2


Fetching images for Chicken: 100%|██████████| 2/2 [00:17<00:00,  8.76s/it]
Overall Progress:  35%|███▌      | 47/134 [06:38<07:52,  5.43s/it]


Number of images retrieved for Chicken: 2


Fetching images for Duck: 100%|██████████| 2/2 [00:14<00:00,  7.21s/it]
Overall Progress:  36%|███▌      | 48/134 [06:39<05:45,  4.02s/it]


Number of images retrieved for Duck: 2




Fetching images for Giraffe:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for panda: 100%|██████████| 2/2 [00:18<00:00,  9.36s/it]
Overall Progress:  37%|███▋      | 49/134 [06:50<08:45,  6.18s/it]


Number of images retrieved for panda: 2



Fetching images for Apple:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Apple:  50%|█████     | 1/2 [00:00<00:00,  1.58it/s][A


Fetching images for Banana:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Giraffe: 100%|██████████| 2/2 [00:15<00:00,  7.65s/it]
Overall Progress:  37%|███▋      | 50/134 [06:58<09:20,  6.67s/it]


Number of images retrieved for Giraffe: 2




Fetching images for Bamboo:   0%|          | 0/2 [00:00<?, ?it/s][A[A



Fetching images for Apricot:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A

Fetching images for Bamboo:  50%|█████     | 1/2 [00:01<00:01,  1.07s/it][A[A



Fetching images for Apricot:  50%|█████     | 1/2 [00:01<00:01,  1.16s/it][A[A[A[A




Fetching images for Bean:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Apple: 100%|██████████| 2/2 [00:13<00:00,  6.88s/it]
Overall Progress:  38%|███▊      | 51/134 [07:07<10:21,  7.48s/it]


Number of images retrieved for Apple: 2


Fetching images for Banana: 100%|██████████| 2/2 [00:14<00:00,  7.12s/it]
Overall Progress:  39%|███▉      | 52/134 [07:11<08:39,  6.34s/it]


Number of images retrieved for Banana: 2



Fetching images for Flower:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Flower:  50%|█████     | 1/2 [00:01<00:01,  1.65s/it][A


Fetching images for Bamboo: 100%|██████████| 2/2 [00:18<00:00,  9.27s/it]



Fetching images for Wildflower:  50%|█████     | 1/2 [00:01<00:01,  1.91s/it][A[A[A


Overall Progress:  40%|███▉      | 53/134 [07:20<09:45,  7.23s/it]


Number of images retrieved for Bamboo: 2


Fetching images for Apricot: 100%|██████████| 2/2 [00:19<00:00,  9.76s/it]
Overall Progress:  40%|████      | 54/134 [07:21<07:09,  5.37s/it]


Number of images retrieved for Apricot: 2


Fetching images for Bean: 100%|██████████| 2/2 [00:18<00:00,  9.06s/it]
Overall Progress:  41%|████      | 55/134 [07:22<05:14,  3.98s/it]


Number of images retrieved for Bean: 2


Fetching images for Flower: 100%|██████████| 2/2 [00:19<00:00,  9.52s/it]

Overall Progress:  42%|████▏     | 56/134 [07:35<08:34,  6.59s/it]


Number of images retrieved for Flower: 2




Fetching images for Wildflower: 100%|██████████| 2/2 [00:17<00:00,  8.95s/it]

Fetching images for Mushroom:  50%|█████     | 1/2 [00:01<00:01,  1.68s/it][A


Overall Progress:  43%|████▎     | 57/134 [07:36<06:33,  5.11s/it]


Number of images retrieved for Wildflower: 2




Fetching images for Weed:  50%|█████     | 1/2 [00:01<00:01,  1.11s/it][A[A


Fetching images for Fern:  50%|█████     | 1/2 [00:01<00:01,  1.04s/it][A[A[A



Fetching images for Reed:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Reed:  50%|█████     | 1/2 [00:01<00:01,  1.15s/it][A[A[A[A




Fetching images for Shrub:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Mushroom: 100%|██████████| 2/2 [00:15<00:00,  7.99s/it]
Overall Progress:  43%|████▎     | 58/134 [07:51<10:03,  7.95s/it]


Number of images retrieved for Mushroom: 2


Fetching images for Weed: 100%|██████████| 2/2 [00:15<00:00,  7.98s/it]
Fetching images for Fern: 100%|██████████| 2/2 [00:15<00:00,  7.88s/it]
Overall Progress:  45%|████▍     | 60/134 [07:52<05:10,  4.20s/it]


Number of images retrieved for Weed: 2

Number of images retrieved for Fern: 2


Fetching images for Reed: 100%|██████████| 2/2 [00:16<00:00,  8.46s/it]
Overall Progress:  46%|████▌     | 61/134 [08:02<07:02,  5.79s/it]


Number of images retrieved for Reed: 2


Fetching images for Shrub: 100%|██████████| 2/2 [00:17<00:00,  8.84s/it]
Overall Progress:  46%|████▋     | 62/134 [08:05<06:03,  5.04s/it]


Number of images retrieved for Shrub: 2



Fetching images for Moss:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Moss:  50%|█████     | 1/2 [00:02<00:02,  2.51s/it][A

Fetching images for Grass:   0%|          | 0/2 [00:00<?, ?it/s][A[A


Fetching images for Palmtree:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Palmtree:  50%|█████     | 1/2 [00:00<00:00,  1.29it/s][A[A[A

Fetching images for Grass:  50%|█████     | 1/2 [00:00<00:00,  1.15it/s][A[A



Fetching images for Tulip:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Tulip:  50%|█████     | 1/2 [00:00<00:00,  3.35it/s][A[A[A[A




Fetching images for Corn:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Moss: 100%|██████████| 2/2 [00:16<00:00,  8.07s/it]
Overall Progress:  47%|████▋     | 63/134 [08:22<10:04,  8.51s/it]


Number of images retrieved for Moss: 2


Fetching images for Grass: 100%|██████████| 2/2 [00:15<00:00,  7.65s/it]
Fetching images for Palmtree: 100%|██████████| 2/2 [00:15<00:00,  7.76s/it]
Overall Progress:  48%|████▊     | 64/134 [08:24<07:51,  6.74s/it]


Number of images retrieved for Grass: 2


Overall Progress:  49%|████▊     | 65/134 [08:24<05:31,  4.80s/it]


Number of images retrieved for Palmtree: 2


Fetching images for Tulip: 100%|██████████| 2/2 [00:14<00:00,  7.40s/it]
Overall Progress:  49%|████▉     | 66/134 [08:27<04:30,  3.98s/it]


Number of images retrieved for Tulip: 2


Fetching images for Corn: 100%|██████████| 2/2 [00:16<00:00,  8.05s/it]
Overall Progress:  50%|█████     | 67/134 [08:30<04:25,  3.96s/it]


Number of images retrieved for Corn: 2



Fetching images for Rose:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Rose:  50%|█████     | 1/2 [00:01<00:01,  1.21s/it][A

Fetching images for Clove:   0%|          | 0/2 [00:00<?, ?it/s][A[A


Fetching images for Dogwood:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A



Fetching images for Durian:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A

Fetching images for Clove:  50%|█████     | 1/2 [00:02<00:02,  2.34s/it][A[A



Fetching images for Durian:  50%|█████     | 1/2 [00:00<00:00,  1.16it/s][A[A[A[A


Fetching images for Dogwood:  50%|█████     | 1/2 [00:01<00:01,  1.69s/it][A[A[A




Fetching images for Ferns:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Rose: 100%|██████████| 2/2 [00:13<00:00,  6.88s/it]
Overall Progress:  51%|█████     | 68/134 [08:52<10:14,  9.31s/it]


Number of images retrieved for Rose: 2


Fetching images for Clove: 100%|██████████| 2/2 [00:18<00:00,  9.09s/it]
Overall Progress:  51%|█████▏    | 69/134 [08:59<09:17,  8.58s/it]


Number of images retrieved for Clove: 2


Fetching images for Durian: 100%|██████████| 2/2 [00:16<00:00,  8.49s/it]
Overall Progress:  52%|█████▏    | 70/134 [09:00<06:39,  6.24s/it]


Number of images retrieved for Durian: 2


Fetching images for Dogwood: 100%|██████████| 2/2 [00:17<00:00,  8.89s/it]
Overall Progress:  53%|█████▎    | 71/134 [09:01<04:52,  4.64s/it]


Number of images retrieved for Dogwood: 2


Fetching images for Ferns: 100%|██████████| 2/2 [00:16<00:00,  8.28s/it]
Overall Progress:  54%|█████▎    | 72/134 [09:03<03:55,  3.80s/it]


Number of images retrieved for Ferns: 2



Fetching images for Fig:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Fig:  50%|█████     | 1/2 [00:01<00:01,  1.28s/it][A

Fetching images for Flax:   0%|          | 0/2 [00:00<?, ?it/s][A[A



Fetching images for Frangipani:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A


Fetching images for Lantana:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A

Fetching images for Flax:  50%|█████     | 1/2 [00:04<00:04,  4.39s/it][A[A




Fetching images for Hibiscus:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A


Fetching images for Fig: 100%|██████████| 2/2 [00:18<00:00,  9.29s/it]
Overall Progress:  54%|█████▍    | 73/134 [09:22<08:38,  8.50s/it]



Fetching images for Frangipani:  50%|█████     | 1/2 [00:04<00:04,  4.76s/it][A[A[A[A


Number of images retrieved for Fig: 2







Fetching images for Hibiscus:  50%|█████     | 1/2 [00:02<00:02,  2.31s/it][A[A[A[A[A
Fetching images for Bougainvillea:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Flax: 100%|██████████| 2/2 [00:17<00:00,  8.54s/it]
Overall Progress:  55%|█████▌    | 74/134 [09:31<08:38,  8.65s/it]


Number of images retrieved for Flax: 2


Fetching images for Lantana: 100%|██████████| 2/2 [00:16<00:00,  8.25s/it]
Overall Progress:  56%|█████▌    | 75/134 [09:34<06:52,  7.00s/it]


Number of images retrieved for Lantana: 2


Fetching images for Frangipani: 100%|██████████| 2/2 [00:18<00:00,  9.18s/it]
Overall Progress:  57%|█████▋    | 76/134 [09:36<05:19,  5.50s/it]


Number of images retrieved for Frangipani: 2


Fetching images for Hibiscus: 100%|██████████| 2/2 [00:16<00:00,  8.09s/it]
Overall Progress:  57%|█████▋    | 77/134 [09:37<03:56,  4.15s/it]


Number of images retrieved for Hibiscus: 2




Fetching images for Pea:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Bougainvillea: 100%|██████████| 2/2 [00:16<00:00,  8.22s/it]
Overall Progress:  58%|█████▊    | 78/134 [09:41<03:44,  4.01s/it]


Number of images retrieved for Bougainvillea: 2



Fetching images for OrchidTree:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for OrchidTree:  50%|█████     | 1/2 [00:03<00:03,  3.29s/it][A


Fetching images for Jackfruit:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A



Fetching images for RangoonCreeper:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A


Fetching images for Jackfruit:  50%|█████     | 1/2 [00:01<00:01,  1.20s/it][A[A[A



Fetching images for Pea: 100%|██████████| 2/2 [00:16<00:00,  8.20s/it]
Overall Progress:  59%|█████▉    | 79/134 [09:55<06:22,  6.95s/it]


Number of images retrieved for Pea: 2




Fetching images for Corneliantree:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for OrchidTree: 100%|██████████| 2/2 [00:18<00:00,  9.02s/it]
Overall Progress:  60%|█████▉    | 80/134 [10:05<07:15,  8.07s/it]


Number of images retrieved for OrchidTree: 2


Fetching images for Jackfruit: 100%|██████████| 2/2 [00:16<00:00,  8.02s/it]

Overall Progress:  60%|██████    | 81/134 [10:07<05:31,  6.26s/it]


Number of images retrieved for Jackfruit: 2



Fetching images for RangoonCreeper: 100%|██████████| 2/2 [00:15<00:00,  7.94s/it]
Overall Progress:  61%|██████    | 82/134 [10:08<04:03,  4.67s/it]


Number of images retrieved for RangoonCreeper: 2


Fetching images for Corneliantree: 100%|██████████| 2/2 [00:14<00:00,  7.09s/it]
Overall Progress:  62%|██████▏   | 83/134 [10:13<04:04,  4.79s/it]


Number of images retrieved for Corneliantree: 2




Fetching images for Coffeeplant:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Coffeeplant:  50%|█████     | 1/2 [00:01<00:01,  1.10s/it][A[A


Fetching images for wheat:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A



Fetching images for Coconut:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A


Fetching images for wheat:  50%|█████     | 1/2 [00:01<00:01,  1.40s/it][A[A[A



Fetching images for Cottonplant: 100%|██████████| 2/2 [00:16<00:00,  8.25s/it]
Overall Progress:  63%|██████▎   | 84/134 [10:24<05:24,  6.50s/it]


Number of images retrieved for Cottonplant: 2



Fetching images for watermelon:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Coffeeplant: 100%|██████████| 2/2 [00:16<00:00,  8.32s/it]
Overall Progress:  63%|██████▎   | 85/134 [10:31<05:19,  6.52s/it]


Number of images retrieved for Coffeeplant: 2


Fetching images for wheat: 100%|██████████| 2/2 [00:15<00:00,  7.82s/it]
Overall Progress:  64%|██████▍   | 86/134 [10:35<04:37,  5.79s/it]


Number of images retrieved for wheat: 2


Fetching images for Coconut: 100%|██████████| 2/2 [00:16<00:00,  8.37s/it]
Overall Progress:  65%|██████▍   | 87/134 [10:36<03:34,  4.57s/it]


Number of images retrieved for Coconut: 2




Fetching images for radish:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for watermelon: 100%|██████████| 2/2 [00:15<00:00,  7.91s/it]
Overall Progress:  66%|██████▌   | 88/134 [10:43<03:58,  5.19s/it]


Number of images retrieved for watermelon: 2



Fetching images for bed:   0%|          | 0/2 [00:00<?, ?it/s][A


Fetching images for cabinet:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A
Fetching images for bed:  50%|█████     | 1/2 [00:01<00:01,  1.15s/it][A


Fetching images for cabinet:  50%|█████     | 1/2 [00:00<00:00,  1.99it/s][A[A[A



Fetching images for chair:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for radish: 100%|██████████| 2/2 [00:13<00:00,  6.70s/it]
Overall Progress:  66%|██████▋   | 89/134 [10:50<04:17,  5.73s/it]


Number of images retrieved for radish: 2




Fetching images for carrot:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for carrot:  50%|█████     | 1/2 [00:00<00:00,  1.20it/s][A[A




Fetching images for chests:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for bed: 100%|██████████| 2/2 [00:16<00:00,  8.35s/it]
Overall Progress:  67%|██████▋   | 90/134 [11:02<05:39,  7.71s/it]


Number of images retrieved for bed: 2


Fetching images for cabinet: 100%|██████████| 2/2 [00:16<00:00,  8.10s/it]
Overall Progress:  68%|██████▊   | 91/134 [11:03<03:59,  5.57s/it]


Number of images retrieved for cabinet: 2


Fetching images for chair: 100%|██████████| 2/2 [00:16<00:00,  8.20s/it]
Overall Progress:  69%|██████▊   | 92/134 [11:06<03:19,  4.75s/it]


Number of images retrieved for chair: 2


Fetching images for carrot: 100%|██████████| 2/2 [00:15<00:00,  7.80s/it]
Overall Progress:  69%|██████▉   | 93/134 [11:11<03:16,  4.79s/it]


Number of images retrieved for carrot: 2


Fetching images for chests: 100%|██████████| 2/2 [00:18<00:00,  9.20s/it]
Overall Progress:  70%|███████   | 94/134 [11:17<03:35,  5.39s/it]


Number of images retrieved for chests: 2



Fetching images for clock:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for clock:  50%|█████     | 1/2 [00:01<00:01,  1.40s/it][A

Fetching images for table:   0%|          | 0/2 [00:00<?, ?it/s][A[A


Fetching images for desks:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A

Fetching images for table:  50%|█████     | 1/2 [00:00<00:00,  1.18it/s][A[A


Fetching images for desks:  50%|█████     | 1/2 [00:01<00:01,  1.04s/it][A[A[A



Fetching images for Bookcase:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for clock: 100%|██████████| 2/2 [00:14<00:00,  7.13s/it]
Overall Progress:  71%|███████   | 95/134 [11:33<05:30,  8.47s/it]


Number of images retrieved for clock: 2


Fetching images for table: 100%|██████████| 2/2 [00:17<00:00,  8.85s/it]
Fetching images for desks: 100%|██████████| 2/2 [00:17<00:00,  8.96s/it]


Number of images retrieved for table: 2



Overall Progress:  72%|███████▏  | 97/134 [11:39<03:21,  5.45s/it]


Number of images retrieved for desks: 2


Fetching images for Bookcase: 100%|██████████| 2/2 [00:18<00:00,  9.03s/it]
Overall Progress:  73%|███████▎  | 98/134 [11:42<02:52,  4.80s/it]


Number of images retrieved for Bookcase: 2



Fetching images for Piano:   0%|          | 0/2 [00:00<?, ?it/s][A

Fetching images for Umbrella:   0%|          | 0/2 [00:00<?, ?it/s][A[A
Fetching images for Piano:  50%|█████     | 1/2 [00:00<00:00,  1.28it/s][A

Fetching images for Piano: 100%|██████████| 2/2 [00:15<00:00,  7.57s/it]
Fetching images for Umbrella: 100%|██████████| 2/2 [00:14<00:00,  7.47s/it]


Number of images retrieved for Piano: 2



Overall Progress:  75%|███████▍  | 100/134 [12:06<04:07,  7.28s/it]


Number of images retrieved for Umbrella: 2



Fetching images for Clothes:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Clothes:  50%|█████     | 1/2 [00:01<00:01,  1.87s/it][A

Fetching images for cart:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for cart:  50%|█████     | 1/2 [00:02<00:02,  2.47s/it][A[A


Fetching images for sofa:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for sofa:  50%|█████     | 1/2 [00:01<00:01,  1.98s/it][A[A[A



Fetching images for ball:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for Clothes: 100%|██████████| 2/2 [00:14<00:00,  7.20s/it]
Overall Progress:  75%|███████▌  | 101/134 [12:23<05:37, 10.23s/it]


Number of images retrieved for Clothes: 2



Fetching images for Bowl:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for cart: 100%|██████████| 2/2 [00:15<00:00,  7.97s/it]
Overall Progress:  76%|███████▌  | 102/134 [12:32<05:21, 10.04s/it]


Number of images retrieved for cart: 2




Fetching images for sofa: 100%|██████████| 2/2 [00:17<00:00,  8.59s/it]
Overall Progress:  77%|███████▋  | 103/134 [12:37<04:18,  8.35s/it]


Number of images retrieved for sofa: 2




Fetching images for ball: 100%|██████████| 2/2 [00:16<00:00,  8.30s/it]
Overall Progress:  78%|███████▊  | 104/134 [12:38<03:08,  6.27s/it]


Number of images retrieved for ball: 2


Fetching images for Bowl: 100%|██████████| 2/2 [00:16<00:00,  8.44s/it]
Overall Progress:  78%|███████▊  | 105/134 [12:43<02:48,  5.82s/it]


Number of images retrieved for Bowl: 2



Fetching images for pan:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for pan:  50%|█████     | 1/2 [00:00<00:00,  1.17it/s][A


Fetching images for spoon: 100%|██████████| 2/2 [00:18<00:00,  9.15s/it]
Overall Progress:  79%|███████▉  | 106/134 [12:54<03:27,  7.41s/it]


Number of images retrieved for spoon: 2





Fetching images for book:  50%|█████     | 1/2 [00:01<00:01,  1.28s/it][A[A[A

Fetching images for Cliff:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for pan: 100%|██████████| 2/2 [00:14<00:00,  7.11s/it]
Overall Progress:  80%|███████▉  | 107/134 [13:00<03:04,  6.83s/it]


Number of images retrieved for pan: 2



Fetching images for Bay:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for Bay:  50%|█████     | 1/2 [00:00<00:00,  1.19it/s][A



Fetching images for fridge:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for book: 100%|██████████| 2/2 [00:16<00:00,  8.11s/it]
Overall Progress:  81%|████████  | 108/134 [13:10<03:25,  7.89s/it]


Number of images retrieved for book: 2


Fetching images for Cliff: 100%|██████████| 2/2 [00:15<00:00,  7.77s/it]
Overall Progress:  81%|████████▏ | 109/134 [13:13<02:40,  6.40s/it]


Number of images retrieved for Cliff: 2




Fetching images for Coast:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Bay: 100%|██████████| 2/2 [00:15<00:00,  7.91s/it]
Overall Progress:  82%|████████▏ | 110/134 [13:18<02:24,  6.00s/it]


Number of images retrieved for Bay: 2



Fetching images for fridge: 100%|██████████| 2/2 [00:15<00:00,  7.71s/it]
Overall Progress:  83%|████████▎ | 111/134 [13:23<02:13,  5.81s/it]


Number of images retrieved for fridge: 2



Fetching images for Mountains:  50%|█████     | 1/2 [00:01<00:01,  1.13s/it][A


Fetching images for Forests:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for Coast: 100%|██████████| 2/2 [00:17<00:00,  8.73s/it]
Overall Progress:  84%|████████▎ | 112/134 [13:31<02:18,  6.29s/it]


Number of images retrieved for Coast: 2




Fetching images for Waterbodies:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Waterbodies:  50%|█████     | 1/2 [00:01<00:01,  1.07s/it][A[A



Fetching images for Mountains: 100%|██████████| 2/2 [00:15<00:00,  7.88s/it]
Overall Progress:  84%|████████▍ | 113/134 [13:39<02:24,  6.88s/it]


Number of images retrieved for Mountains: 2






Fetching images for Lake:  50%|█████     | 1/2 [00:01<00:01,  1.26s/it][A[A[A[A
Fetching images for Forests: 100%|██████████| 2/2 [00:19<00:00,  9.65s/it]

Overall Progress:  85%|████████▌ | 114/134 [13:46<02:18,  6.91s/it]


Number of images retrieved for Forests: 2


Fetching images for Waterbodies: 100%|██████████| 2/2 [00:16<00:00,  8.22s/it]
Overall Progress:  86%|████████▌ | 115/134 [13:48<01:45,  5.53s/it]


Number of images retrieved for Waterbodies: 2




Fetching images for farmland:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for Lake: 100%|██████████| 2/2 [00:17<00:00,  8.53s/it]
Overall Progress:  87%|████████▋ | 116/134 [13:55<01:47,  5.98s/it]


Number of images retrieved for Lake: 2





Fetching images for river:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for river:  50%|█████     | 1/2 [00:01<00:01,  1.68s/it][A[A[A



Fetching images for desert: 100%|██████████| 2/2 [00:16<00:00,  8.44s/it]
Overall Progress:  87%|████████▋ | 117/134 [14:01<01:41,  5.97s/it]


Number of images retrieved for desert: 2






Fetching images for hedges:  50%|█████     | 1/2 [00:01<00:01,  1.35s/it][A[A[A[A
Fetching images for plain:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for farmland: 100%|██████████| 2/2 [00:17<00:00,  8.65s/it]
Overall Progress:  88%|████████▊ | 118/134 [14:11<01:52,  7.01s/it]


Number of images retrieved for farmland: 2


Fetching images for river: 100%|██████████| 2/2 [00:16<00:00,  8.38s/it]
Overall Progress:  89%|████████▉ | 119/134 [14:16<01:35,  6.36s/it]


Number of images retrieved for river: 2




Fetching images for hedges: 100%|██████████| 2/2 [00:17<00:00,  8.50s/it]
Overall Progress:  90%|████████▉ | 120/134 [14:18<01:11,  5.09s/it]


Number of images retrieved for hedges: 2




Fetching images for plain: 100%|██████████| 2/2 [00:16<00:00,  8.43s/it]
Overall Progress:  90%|█████████ | 121/134 [14:25<01:13,  5.65s/it]


Number of images retrieved for plain: 2



Fetching images for cave:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for sky: 100%|██████████| 2/2 [00:15<00:00,  7.54s/it]


Overall Progress:  91%|█████████ | 122/134 [14:31<01:11,  5.99s/it]

Fetching images for flowergarden:  50%|█████     | 1/2 [00:00<00:00,  3.36it/s][A[A


Number of images retrieved for sky: 2





Fetching images for grassland:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A



Fetching images for cloud:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A


Fetching images for grassland:  50%|█████     | 1/2 [00:00<00:00,  1.53it/s][A[A[A



Fetching images for cave: 100%|██████████| 2/2 [00:14<00:00,  7.34s/it]
Overall Progress:  92%|█████████▏| 123/134 [14:40<01:14,  6.79s/it]


Number of images retrieved for cave: 2



Fetching images for horizon:   0%|          | 0/2 [00:00<?, ?it/s][A
Fetching images for flowergarden: 100%|██████████| 2/2 [00:15<00:00,  7.81s/it]
Overall Progress:  93%|█████████▎| 124/134 [14:47<01:08,  6.87s/it]


Number of images retrieved for flowergarden: 2


Fetching images for grassland: 100%|██████████| 2/2 [00:16<00:00,  8.22s/it]
Overall Progress:  93%|█████████▎| 125/134 [14:53<00:59,  6.63s/it]


Number of images retrieved for grassland: 2


Fetching images for cloud: 100%|██████████| 2/2 [00:16<00:00,  8.42s/it]
Overall Progress:  94%|█████████▍| 126/134 [14:54<00:38,  4.84s/it]


Number of images retrieved for cloud: 2


Fetching images for horizon: 100%|██████████| 2/2 [00:18<00:00,  9.45s/it]
Overall Progress:  95%|█████████▍| 127/134 [15:02<00:41,  5.93s/it]


Number of images retrieved for horizon: 2



Fetching images for lighthouse:   0%|          | 0/2 [00:00<?, ?it/s][A

Fetching images for glacier:   0%|          | 0/2 [00:00<?, ?it/s][A[A
Fetching images for lighthouse:  50%|█████     | 1/2 [00:02<00:01,  1.99s/it][A

Fetching images for glacier:  50%|█████     | 1/2 [00:02<00:02,  2.12s/it][A[A


Fetching images for savannah:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Fetching images for savannah:  50%|█████     | 1/2 [00:00<00:00,  2.03it/s][A[A[A



Fetching images for valley:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A



Fetching images for valley:  50%|█████     | 1/2 [00:00<00:00,  3.74it/s][A[A[A[A




Fetching images for plateau:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for lighthouse: 100%|██████████| 2/2 [00:15<00:00,  7.89s/it]
Overall Progress:  96%|█████████▌| 128/134 [15:20<00:55,  9.30s/it]


Number of images retrieved for lighthouse: 2


Fetching images for glacier: 100%|██████████| 2/2 [00:16<00:00,  8.22s/it]
Overall Progress:  96%|█████████▋| 129/134 [15:20<00:33,  6.77s/it]


Number of images retrieved for glacier: 2


Fetching images for savannah: 100%|██████████| 2/2 [00:16<00:00,  8.36s/it]
Fetching images for valley: 100%|██████████| 2/2 [00:16<00:00,  8.25s/it]


Number of images retrieved for savannah: 2



Overall Progress:  98%|█████████▊| 131/134 [15:26<00:13,  4.44s/it]


Number of images retrieved for valley: 2


Fetching images for plateau: 100%|██████████| 2/2 [00:14<00:00,  7.08s/it]
Overall Progress:  99%|█████████▊| 132/134 [15:30<00:08,  4.34s/it]


Number of images retrieved for plateau: 2



Fetching images for volcano:   0%|          | 0/2 [00:00<?, ?it/s][A

Fetching images for waterfall:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Fetching images for waterfall:  50%|█████     | 1/2 [00:01<00:01,  1.25s/it][A[A
Fetching images for waterfall: 100%|██████████| 2/2 [00:13<00:00,  6.99s/it]
Fetching images for volcano: 100%|██████████| 2/2 [00:14<00:00,  7.14s/it]



Number of images retrieved for waterfall: 2


Overall Progress: 100%|██████████| 134/134 [15:47<00:00,  7.07s/it]


Number of images retrieved for volcano: 2
Data saved to image_urls.json





In [4]:

class ImageDownloader:
    def __init__(self, json_file, download_dir='Dataset', max_workers=4, delay=1):
        self.json_file = json_file  # File chứa URL của ảnh dưới dạng JSON
        self.download_dir = download_dir  # Tên thư mục để lưu trữ ảnh
        self.max_workers = max_workers  # Số lượng luồng
        self.delay = delay  # Độ trễ lịch sự
        self.filename = set()  # Để lưu trữ các đường dẫn tên file
        self.setup_directory()  # Thiết lập cấu trúc thư mục

    def setup_directory(self):
        if not os.path.exists(self.download_dir):
            os.makedirs(self.download_dir)

    def read_json(self):
        """
        Đọc file JSON và trả về dữ liệu.

        Trả về:
        data (dict): Dữ liệu đọc từ file JSON.
        """
        with open(self.json_file, 'r') as file:
            data = json.load(file)
        return data

    def is_valid_url(self, url):
        """
        Kiểm tra xem URL có hợp lệ không.

        Tham số:
        url (str): URL cần kiểm tra.

        Trả về:
        bool: True nếu URL hợp lệ, False nếu không.
        """
        try:
            with urllib.request.urlopen(url) as response:
                if response.status == 200 and 'image' in response.info().get_content_type():
                    return True
        except Exception:
            return False

    def download_image(self, url, category, term, pbar):
        """
        Tải ảnh từ URL đã cho.

        Tham số:
        url (str): URL của ảnh cần tải.
        category (str): Danh mục của ảnh.
        term (str): Từ khóa liên quan đến ảnh.
        pbar (tqdm): Đối tượng thanh tiến trình.

        Trả về:
        str: Thông báo chỉ ra trạng thái của việc tải.
        """
        if not self.is_valid_url(url):
            pbar.update(1)
            return f"Invalid URL: {url}"

        category_dir = os.path.join(self.download_dir, category)
        if not os.path.exists(category_dir):
            os.makedirs(category_dir)

        term_dir = os.path.join(category_dir, term)
        if not os.path.exists(term_dir):
            os.makedirs(term_dir)

        filename = os.path.join(term_dir, os.path.basename(urlparse(url).path))
        self.filename.add(filename)  # Ghi lại đường dẫn tên file

        try:
            urllib.request.urlretrieve(url, filename)
            pbar.update(1)
            return f"Downloaded: {url}"
        except Exception as e:
            pbar.update(1)
            return f"Failed to download {url}: {str(e)}"

    def download_images(self):
        """
        Tải ảnh từ file JSON.

        Trả về:
        None
        """
        data = self.read_json()
        download_tasks = []

        total_images = sum(len(urls) for terms in data.values() for urls in terms.values())
        with tqdm(total=total_images, desc="Downloading images") as pbar:
            with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
                for category, terms in data.items():
                    for term, urls in terms.items():
                        for url in urls:
                            download_tasks.append(executor.submit(self.download_image, url, category, term, pbar))
                            time.sleep(self.delay)  # Độ trễ lịch sự

                for future in concurrent.futures.as_completed(download_tasks):
                    print(future.result())

        self.export_filename()

    def export_filename(self):
        """
        Xuất danh sách đường dẫn tên file vào một file text.

        Trả về:
        None
        """
        with open('filename.txt', 'w') as file:
            for filename in sorted(self.filename):
                file.write(f"{filename}\n")


In [5]:
downloader = ImageDownloader(json_file='image_urls.json', download_dir='Dataset', max_workers=4, delay=1)
downloader.download_images()
downloader.export_filename()

Downloading images:  26%|██▌       | 68/266 [04:20<08:16,  2.51s/it]

Downloaded: https://live.staticflickr.com/7259/7577390134_b1cda83627_b.jpg
Downloaded: https://live.staticflickr.com/4437/36367572521_a8d5235e47_b.jpg
Downloaded: https://live.staticflickr.com/7377/9641031199_ea7460e902_b.jpg
Downloaded: https://live.staticflickr.com/5596/14307815484_87d075481a_b.jpg
Downloaded: https://live.staticflickr.com/1962/44791540314_c8ae2861d2_b.jpg
Downloaded: https://live.staticflickr.com/65535/33659529142_2b6c3241eb_b.jpg
Downloaded: https://live.staticflickr.com/2815/9510353068_b0b4f552d3_b.jpg
Downloaded: https://live.staticflickr.com/4269/35172789496_a387d77f5f_b.jpg
Downloaded: https://live.staticflickr.com/6161/6177244431_80f72e56f0_b.jpg
Downloaded: https://live.staticflickr.com/5254/5412377706_d77700fc38_b.jpg
Downloaded: https://live.staticflickr.com/8147/7471104002_169c3077ba_b.jpg
Downloaded: https://live.staticflickr.com/1934/45431020202_88040ae942_b.jpg
Downloaded: https://live.staticflickr.com/3506/3194806745_1465ae0373_b.jpg
Downloaded: https:

Downloading images:  26%|██▌       | 69/266 [04:28<12:47,  3.90s/it]

Downloaded: https://live.staticflickr.com/7446/26509038664_a8730f7bf4_b.jpg


Downloading images:  26%|██▋       | 70/266 [04:35<15:01,  4.60s/it]

Downloaded: https://live.staticflickr.com/65535/52750533076_fdfc390fcb_b.jpg


Downloading images:  27%|██▋       | 71/266 [04:41<16:15,  5.00s/it]

Downloaded: https://live.staticflickr.com/3099/3194860899_640ce169ac_b.jpg
Downloaded: https://live.staticflickr.com/4141/4871150060_c7e5e5175e_b.jpg


Downloading images:  27%|██▋       | 73/266 [04:43<10:40,  3.32s/it]

Downloaded: https://live.staticflickr.com/1189/561670551_efa5f9ae72_b.jpg


Downloading images:  28%|██▊       | 74/266 [04:45<09:19,  2.92s/it]

Downloaded: https://live.staticflickr.com/3792/10314690654_7eb5f7691e_b.jpg


Downloading images:  28%|██▊       | 75/266 [04:48<09:31,  2.99s/it]

Downloaded: https://live.staticflickr.com/5537/14142817779_568406cd86_b.jpg


Downloading images:  29%|██▊       | 76/266 [04:50<08:58,  2.83s/it]

Downloaded: https://live.staticflickr.com/1662/23809746334_43928c099f_b.jpg


Downloading images:  29%|██▉       | 77/266 [04:56<11:06,  3.52s/it]

Downloaded: https://live.staticflickr.com/6082/6078011473_8ef433ea18_b.jpg


Downloading images:  29%|██▉       | 78/266 [04:57<08:51,  2.83s/it]

Downloaded: https://live.staticflickr.com/4426/36551879530_8588628ff8_b.jpg


Downloading images:  30%|██▉       | 79/266 [04:57<06:32,  2.10s/it]

Downloaded: https://live.staticflickr.com/3182/2886688671_31bc680137_b.jpg


Downloading images:  30%|███       | 80/266 [04:59<06:30,  2.10s/it]

Downloaded: https://live.staticflickr.com/5452/30094682741_1d471f706b_b.jpg


Downloading images:  30%|███       | 81/266 [05:11<14:55,  4.84s/it]

Downloaded: https://live.staticflickr.com/2377/2285077373_bbc137476f_b.jpg


Downloading images:  31%|███       | 82/266 [05:15<14:42,  4.80s/it]

Downloaded: https://live.staticflickr.com/2685/4489852142_28eb56feec_b.jpg


Downloading images:  31%|███       | 83/266 [05:19<13:22,  4.38s/it]

Downloaded: https://live.staticflickr.com/5143/5644554429_82c04f547c_b.jpg


Downloading images:  32%|███▏      | 84/266 [05:20<10:26,  3.45s/it]

Downloaded: https://live.staticflickr.com/4147/4977427214_d2c3f22c3c_b.jpg


Downloading images:  32%|███▏      | 85/266 [05:24<10:30,  3.48s/it]

Downloaded: https://live.staticflickr.com/4129/4838389045_d5fc08ca2b_b.jpg


Downloading images:  32%|███▏      | 86/266 [05:27<10:35,  3.53s/it]

Downloaded: https://live.staticflickr.com/933/42144751990_cb2c7b335a_b.jpg


Downloading images:  33%|███▎      | 87/266 [05:28<07:59,  2.68s/it]

Downloaded: https://live.staticflickr.com/7318/9867033144_32a443834a_b.jpg


Downloading images:  33%|███▎      | 88/266 [05:32<08:47,  2.97s/it]

Downloaded: https://live.staticflickr.com/918/42572640954_487b61c1c0_b.jpg


Downloading images:  33%|███▎      | 89/266 [05:34<08:29,  2.88s/it]

Downloaded: https://live.staticflickr.com/2919/33846692900_89016be5be_b.jpg


Downloading images:  34%|███▍      | 90/266 [05:39<09:56,  3.39s/it]

Downloaded: https://live.staticflickr.com/5568/14759103752_ffde6debfc_b.jpg


Downloading images:  34%|███▍      | 91/266 [05:49<15:57,  5.47s/it]

Downloaded: https://live.staticflickr.com/2064/2149660950_8c21ccda8b_b.jpg


Downloading images:  35%|███▍      | 92/266 [05:49<11:17,  3.90s/it]

Downloaded: https://live.staticflickr.com/3262/2720988253_ed44346067_b.jpg


Downloading images:  35%|███▍      | 93/266 [05:51<09:02,  3.14s/it]

Downloaded: https://live.staticflickr.com/1702/26141463766_4feb75d3c7_b.jpg


Downloading images:  35%|███▌      | 94/266 [06:02<15:36,  5.45s/it]

Downloaded: https://live.staticflickr.com/1613/25987709946_ddd7534993_b.jpg


Downloading images:  36%|███▌      | 95/266 [06:11<18:46,  6.59s/it]

Downloaded: https://live.staticflickr.com/8323/8446750937_bd821ab062_b.jpg


Downloading images:  36%|███▌      | 96/266 [06:13<14:41,  5.19s/it]

Downloaded: https://live.staticflickr.com/7849/32097994667_10d3dda371_b.jpg


Downloading images:  36%|███▋      | 97/266 [06:15<12:07,  4.31s/it]

Downloaded: https://live.staticflickr.com/647/21982802134_e4c135c7f8_b.jpg


Downloading images:  37%|███▋      | 98/266 [06:16<09:12,  3.29s/it]

Downloaded: https://live.staticflickr.com/8299/7995427205_d0e80f29d2_b.jpg


Downloading images:  37%|███▋      | 99/266 [06:16<06:42,  2.41s/it]

Downloaded: https://live.staticflickr.com/3779/11530597946_b8a3eb4e3b_b.jpg


Downloading images:  38%|███▊      | 100/266 [06:18<06:06,  2.21s/it]

Downloaded: https://live.staticflickr.com/65535/50504682357_72e8e99316_b.jpg


Downloading images:  38%|███▊      | 101/266 [06:21<06:57,  2.53s/it]

Downloaded: https://live.staticflickr.com/8589/16104491773_9cc84cfefc_b.jpg


Downloading images:  38%|███▊      | 102/266 [06:23<05:56,  2.18s/it]

Downloaded: https://live.staticflickr.com/65535/49881165307_d4d0a265ff_b.jpg
Downloaded: https://live.staticflickr.com/65535/53860888965_68d9a61fd4_b.jpg


Downloading images:  39%|███▉      | 104/266 [06:26<05:19,  1.97s/it]

Downloaded: https://live.staticflickr.com/65535/52129980137_f64487da33_b.jpg


Downloading images:  39%|███▉      | 105/266 [06:37<10:56,  4.08s/it]

Downloaded: https://live.staticflickr.com/2777/4383818501_4edf833c67_b.jpg


Downloading images:  40%|███▉      | 106/266 [06:38<09:02,  3.39s/it]

Downloaded: https://live.staticflickr.com/65535/52940370825_f4c3be697d_b.jpg


Downloading images:  40%|████      | 107/266 [06:42<09:37,  3.63s/it]

Downloaded: https://live.staticflickr.com/3559/3446613250_8569b7d582_b.jpg


Downloading images:  41%|████      | 108/266 [06:42<07:02,  2.68s/it]

Downloaded: https://live.staticflickr.com/4005/4300636744_3dcb938a25_b.jpg


Downloading images:  41%|████      | 109/266 [06:44<06:30,  2.49s/it]

Downloaded: https://live.staticflickr.com/65535/51178114358_63f13eef54_b.jpg


Downloading images:  41%|████▏     | 110/266 [06:46<05:36,  2.16s/it]

Downloaded: https://live.staticflickr.com/65535/52989500127_b636f0b2c4_b.jpg


Downloading images:  42%|████▏     | 111/266 [06:51<07:55,  3.07s/it]

Downloaded: https://live.staticflickr.com/3847/14882452851_009c72bfaf_b.jpg


Downloading images:  42%|████▏     | 112/266 [06:52<06:27,  2.52s/it]

Downloaded: https://live.staticflickr.com/1408/846238617_97291a900b_b.jpg


Downloading images:  42%|████▏     | 113/266 [06:57<08:02,  3.15s/it]

Downloaded: https://live.staticflickr.com/2781/4423005169_ae1951a079_b.jpg
Downloaded: https://live.staticflickr.com/65535/52043837088_404b2db6d4_b.jpg


Downloading images:  43%|████▎     | 115/266 [06:59<05:36,  2.23s/it]

Downloaded: https://live.staticflickr.com/65535/52886063715_b64e90d6a7_b.jpg


Downloading images:  44%|████▎     | 116/266 [07:06<08:16,  3.31s/it]

Downloaded: https://live.staticflickr.com/7013/6655052549_c86b56ccc3_b.jpg


Downloading images:  44%|████▍     | 117/266 [07:17<13:18,  5.36s/it]

Downloaded: https://live.staticflickr.com/65535/52814399290_d1a5d6d8b5_b.jpg


Downloading images:  44%|████▍     | 118/266 [07:21<12:15,  4.97s/it]

Downloaded: https://live.staticflickr.com/7491/15139193753_df9f192c11_b.jpg


Downloading images:  45%|████▍     | 119/266 [07:22<09:49,  4.01s/it]

Downloaded: https://live.staticflickr.com/436/18164194144_a1924236fa_b.jpg


Downloading images:  45%|████▌     | 120/266 [07:25<08:47,  3.61s/it]

Downloaded: https://live.staticflickr.com/15/19091229_1f7fe28b53_b.jpg


Downloading images:  45%|████▌     | 121/266 [07:26<07:09,  2.96s/it]

Downloaded: https://live.staticflickr.com/65535/52371076093_302829e04a_b.jpg


Downloading images:  46%|████▌     | 122/266 [07:28<06:16,  2.62s/it]

Downloaded: https://live.staticflickr.com/65535/51970555752_3d146b592d_b.jpg


Downloading images:  46%|████▌     | 123/266 [07:29<04:51,  2.04s/it]

Downloaded: https://live.staticflickr.com/65535/51839293870_b1aed56ee9.jpg


Downloading images:  47%|████▋     | 124/266 [07:39<10:16,  4.34s/it]

Downloaded: https://live.staticflickr.com/4220/35220834766_5d1680c918_b.jpg


Downloading images:  47%|████▋     | 125/266 [07:41<08:49,  3.75s/it]

Downloaded: https://live.staticflickr.com/3459/3391070711_c52d1d8d18_b.jpg


Downloading images:  47%|████▋     | 126/266 [07:44<08:00,  3.43s/it]

Downloaded: https://live.staticflickr.com/8374/8591674907_6fb71ee4ac_b.jpg


Downloading images:  48%|████▊     | 127/266 [07:44<05:58,  2.58s/it]

Downloaded: https://live.staticflickr.com/65535/51746688832_4380a61544_b.jpg


Downloading images:  48%|████▊     | 128/266 [07:45<04:47,  2.08s/it]

Downloaded: https://live.staticflickr.com/5287/5248099209_90ed5d971f_b.jpg


Downloading images:  48%|████▊     | 129/266 [07:54<09:30,  4.17s/it]

Downloaded: https://live.staticflickr.com/2644/3929675039_c06b6a9440_b.jpg


Downloading images:  49%|████▉     | 130/266 [08:03<12:19,  5.44s/it]

Downloaded: https://live.staticflickr.com/4709/39879980494_e6b17ddba6_b.jpg


Downloading images:  49%|████▉     | 131/266 [08:04<09:26,  4.20s/it]

Downloaded: https://live.staticflickr.com/7056/6822877094_7a36b955ef_b.jpg


Downloading images:  50%|████▉     | 132/266 [08:07<08:29,  3.80s/it]

Downloaded: https://live.staticflickr.com/3601/3572306382_5d8a339b7a_b.jpg


Downloading images:  50%|█████     | 133/266 [08:12<09:22,  4.23s/it]

Downloaded: https://live.staticflickr.com/65535/53742600482_be65dab11c_b.jpg


Downloading images:  50%|█████     | 134/266 [08:14<07:42,  3.51s/it]

Downloaded: https://live.staticflickr.com/3372/3423277348_61ee6880bb_b.jpg
Downloaded: https://live.staticflickr.com/879/41845529985_04513daa36_b.jpg


Downloading images:  51%|█████     | 136/266 [08:21<07:32,  3.48s/it]

Downloaded: https://live.staticflickr.com/8233/8430302275_0d5f2b8693_b.jpg


Downloading images:  52%|█████▏    | 137/266 [08:23<06:58,  3.24s/it]

Downloaded: https://live.staticflickr.com/3485/3276025446_cacfc8b583_b.jpg


Downloading images:  52%|█████▏    | 138/266 [08:24<05:19,  2.50s/it]

Downloaded: https://live.staticflickr.com/7013/6655052549_c86b56ccc3_b.jpg
Downloaded: https://live.staticflickr.com/15/19091229_1f7fe28b53_b.jpg


Downloading images:  53%|█████▎    | 140/266 [08:29<05:28,  2.61s/it]

Downloaded: https://live.staticflickr.com/65535/53055625804_808a736425_b.jpg


Downloading images:  53%|█████▎    | 141/266 [08:36<07:36,  3.65s/it]

Downloaded: https://live.staticflickr.com/65535/51174618507_92b824442a_b.jpg


Downloading images:  53%|█████▎    | 142/266 [08:40<07:45,  3.75s/it]

Downloaded: https://live.staticflickr.com/8604/16346019857_8c8e320ea0_b.jpg


Downloading images:  54%|█████▍    | 143/266 [08:41<06:06,  2.98s/it]

Downloaded: https://live.staticflickr.com/7029/27122090776_c88a948698_b.jpg


Downloading images:  54%|█████▍    | 144/266 [08:42<04:34,  2.25s/it]

Downloaded: https://live.staticflickr.com/3035/2480337102_9e9e97b60f_b.jpg


Downloading images:  55%|█████▍    | 145/266 [08:43<04:08,  2.06s/it]

Downloaded: https://live.staticflickr.com/65535/48666722771_02e68f1941_b.jpg


Downloading images:  55%|█████▍    | 146/266 [08:46<04:44,  2.37s/it]

Downloaded: https://live.staticflickr.com/619/21565496366_6c44efae27_b.jpg


Downloading images:  56%|█████▌    | 148/266 [08:47<02:45,  1.40s/it]

Downloaded: https://live.staticflickr.com/3200/2782424667_a569603166_b.jpg
Downloaded: https://live.staticflickr.com/5781/22116589925_30e0d478b2_b.jpg


Downloading images:  56%|█████▌    | 149/266 [08:49<02:47,  1.43s/it]

Downloaded: https://live.staticflickr.com/65535/51182093434_0961959762_b.jpg


Downloading images:  56%|█████▋    | 150/266 [08:49<02:13,  1.15s/it]

Downloaded: https://live.staticflickr.com/5751/22607587331_584a1e165b_b.jpg


Downloading images:  57%|█████▋    | 151/266 [08:50<02:09,  1.13s/it]

Downloaded: https://live.staticflickr.com/65535/52933773027_690f6af44c_b.jpg


Downloading images:  57%|█████▋    | 152/266 [08:51<01:45,  1.08it/s]

Downloaded: https://live.staticflickr.com/614/21975394393_53e12c47d1_b.jpg


Downloading images:  58%|█████▊    | 153/266 [08:51<01:25,  1.33it/s]

Downloaded: https://live.staticflickr.com/65535/52808462909_e87c8a65c6_b.jpg


Downloading images:  58%|█████▊    | 154/266 [08:51<01:08,  1.63it/s]

Downloaded: https://live.staticflickr.com/65535/50083467973_4f65969efd_b.jpg


Downloading images:  58%|█████▊    | 155/266 [08:52<01:03,  1.75it/s]

Downloaded: https://live.staticflickr.com/5668/22498102696_0d662a2d5f_b.jpg


Downloading images:  59%|█████▊    | 156/266 [08:53<01:26,  1.26it/s]

Downloaded: https://live.staticflickr.com/65535/52775614766_3d7715cff2_b.jpg


Downloading images:  59%|█████▉    | 157/266 [08:53<01:08,  1.60it/s]

Downloaded: https://live.staticflickr.com/4061/4394155973_252b15341d_b.jpg


Downloading images:  59%|█████▉    | 158/266 [08:54<01:09,  1.55it/s]

Downloaded: https://live.staticflickr.com/8781/17498256752_768f01e02c_b.jpg


Downloading images:  60%|█████▉    | 159/266 [08:54<00:59,  1.81it/s]

Downloaded: https://live.staticflickr.com/5150/5791151895_67dbb05cdf_b.jpg


Downloading images:  60%|██████    | 160/266 [08:56<01:22,  1.28it/s]

Downloaded: https://live.staticflickr.com/65535/51088815079_18ef76a98b_b.jpg


Downloading images:  61%|██████    | 161/266 [08:58<02:10,  1.24s/it]

Downloaded: https://live.staticflickr.com/65535/53722444476_164d86669a_b.jpg


Downloading images:  61%|██████    | 162/266 [08:59<02:03,  1.19s/it]

Downloaded: https://live.staticflickr.com/4446/37969242581_ced9851653_b.jpg


Downloading images:  61%|██████▏   | 163/266 [09:01<02:24,  1.40s/it]

Downloaded: https://live.staticflickr.com/65535/53067123696_28d7aa67c6_b.jpg


Downloading images:  62%|██████▏   | 164/266 [09:02<02:22,  1.40s/it]

Downloaded: https://live.staticflickr.com/4188/34282669112_854cf2eb57_b.jpg


Downloading images:  62%|██████▏   | 165/266 [09:03<02:05,  1.25s/it]

Downloaded: https://live.staticflickr.com/65535/53359359452_5ffae7e093_b.jpg


Downloading images:  62%|██████▏   | 166/266 [09:04<01:37,  1.02it/s]

Downloaded: https://live.staticflickr.com/7773/26953848586_89bab69e89_b.jpg


Downloading images:  63%|██████▎   | 167/266 [09:05<01:40,  1.01s/it]

Downloaded: https://live.staticflickr.com/1733/41569719005_67551b978a_b.jpg
Downloaded: https://live.staticflickr.com/3167/2562952041_fbd4b397b5_b.jpg


Downloading images:  64%|██████▎   | 169/266 [09:05<00:58,  1.66it/s]

Downloaded: https://live.staticflickr.com/4116/4764724998_21d90eaebb_b.jpg


Downloading images:  64%|██████▍   | 170/266 [09:05<00:51,  1.86it/s]

Downloaded: https://live.staticflickr.com/5025/5591703036_7ed228c5e3_b.jpg


Downloading images:  65%|██████▍   | 172/266 [09:06<00:42,  2.19it/s]

Downloaded: https://live.staticflickr.com/65535/53716014803_3344904fea_b.jpg
Downloaded: https://live.staticflickr.com/65535/51035833742_c9692e70ba_b.jpg


Downloading images:  65%|██████▌   | 173/266 [09:07<00:43,  2.15it/s]

Downloaded: https://live.staticflickr.com/2731/4128271544_6ff5b3fdec_b.jpg


Downloading images:  65%|██████▌   | 174/266 [09:07<00:36,  2.55it/s]

Downloaded: https://live.staticflickr.com/65535/52189470994_7d7aa5081e_b.jpg


Downloading images:  66%|██████▌   | 175/266 [09:07<00:39,  2.31it/s]

Downloaded: https://live.staticflickr.com/4200/34711514806_f0fab551d0_b.jpg


Downloading images:  66%|██████▌   | 176/266 [09:08<00:35,  2.52it/s]

Downloaded: https://live.staticflickr.com/65535/52831539074_bf5bb488db_b.jpg


Downloading images:  68%|██████▊   | 180/266 [09:08<00:20,  4.16it/s]

Downloaded: https://live.staticflickr.com/65535/49456860741_5b2064a7e4_b.jpg
Downloaded: https://live.staticflickr.com/3057/2884135028_81da01934a_b.jpg
Downloaded: https://live.staticflickr.com/65535/52766858427_c6a714b52b_b.jpg
Downloaded: https://live.staticflickr.com/2039/2225401898_80a74f4066_b.jpg


Downloading images:  68%|██████▊   | 181/266 [09:09<00:23,  3.55it/s]

Downloaded: https://live.staticflickr.com/53/147482645_d125c7f1f8_b.jpg


Downloading images:  69%|██████▉   | 183/266 [09:09<00:20,  4.06it/s]

Downloaded: https://live.staticflickr.com/3672/13211936983_a8d62abc85_b.jpg
Downloaded: https://live.staticflickr.com/2904/14220902143_096bc108d1_b.jpg


Downloading images:  69%|██████▉   | 184/266 [09:10<00:24,  3.41it/s]

Downloaded: https://live.staticflickr.com/2535/3875424658_d824fdfc3d_b.jpg


Downloading images:  70%|██████▉   | 185/266 [09:10<00:23,  3.40it/s]

Downloaded: https://live.staticflickr.com/3740/13212246875_b31b94918d_b.jpg


Downloading images:  70%|███████   | 187/266 [09:10<00:19,  4.08it/s]

Downloaded: https://live.staticflickr.com/3518/4563857816_9accb1a2f1_b.jpg
Downloaded: https://live.staticflickr.com/7907/46920425501_8eee0b252c_b.jpg


Downloading images:  71%|███████   | 188/266 [09:11<00:16,  4.64it/s]

Downloaded: https://live.staticflickr.com/2275/5751096919_6e389919d0_b.jpg


Downloading images:  71%|███████   | 189/266 [09:11<00:21,  3.61it/s]

Downloaded: https://live.staticflickr.com/227/469530755_57382d49c6_b.jpg


Downloading images:  72%|███████▏  | 191/266 [09:11<00:18,  4.07it/s]

Downloaded: https://live.staticflickr.com/2471/3918659599_4f87a05e82_b.jpg
Downloaded: https://live.staticflickr.com/3090/2843336762_5d02117beb_b.jpg


Downloading images:  72%|███████▏  | 192/266 [09:12<00:16,  4.50it/s]

Downloaded: https://live.staticflickr.com/8029/8007062220_81b9630b31_b.jpg


Downloading images:  73%|███████▎  | 194/266 [09:12<00:15,  4.78it/s]

Downloaded: https://live.staticflickr.com/6058/6219293067_bddc9c5f79_b.jpg
Downloaded: https://live.staticflickr.com/7643/16795616287_f1738d2cf0_b.jpg


Downloading images:  73%|███████▎  | 195/266 [09:12<00:13,  5.10it/s]

Downloaded: https://live.staticflickr.com/1091/1208805552_f35a9540a5_b.jpg


Downloading images:  74%|███████▎  | 196/266 [09:13<00:16,  4.17it/s]

Downloaded: https://live.staticflickr.com/3430/3370561778_8e53c343e4_b.jpg


Downloading images:  74%|███████▍  | 197/266 [09:13<00:24,  2.87it/s]

Downloaded: https://live.staticflickr.com/1251/1176750453_945d7188c1_b.jpg
Downloaded: https://live.staticflickr.com/3552/3347436478_71d3a3a4d6_b.jpg


Downloading images:  75%|███████▍  | 199/266 [09:14<00:18,  3.55it/s]

Downloaded: https://live.staticflickr.com/4118/5438820238_2f32a5dc00_b.jpg


Downloading images:  75%|███████▌  | 200/266 [09:14<00:20,  3.26it/s]

Downloaded: https://live.staticflickr.com/5300/5486645757_60c2f34bfd_b.jpg


Downloading images:  76%|███████▌  | 201/266 [09:14<00:20,  3.24it/s]

Downloaded: https://live.staticflickr.com/8315/27882103054_9d980562e7_b.jpg


Downloading images:  76%|███████▋  | 203/266 [09:15<00:15,  3.95it/s]

Downloaded: https://live.staticflickr.com/41/127297137_651bcf78e2_b.jpg
Downloaded: https://live.staticflickr.com/22/34286436_40d483cfe5_b.jpg


Downloading images:  77%|███████▋  | 204/266 [09:15<00:18,  3.31it/s]

Downloaded: https://live.staticflickr.com/2464/3901745106_4f4c83fb58_b.jpg


Downloading images:  77%|███████▋  | 206/266 [09:15<00:14,  4.05it/s]

Downloaded: https://live.staticflickr.com/1396/834072578_4c62399dba_b.jpg
Downloaded: https://live.staticflickr.com/90/270081182_92483ed242_b.jpg
Downloaded: https://live.staticflickr.com/7187/6932304003_e9202a260f_b.jpg


Downloading images:  78%|███████▊  | 208/266 [09:16<00:12,  4.77it/s]

Downloaded: https://live.staticflickr.com/5244/13988243986_1601953042_b.jpg


Downloading images:  79%|███████▊  | 209/266 [09:16<00:13,  4.25it/s]

Downloaded: https://live.staticflickr.com/99/256998822_2fae4fc222_b.jpg


Downloading images:  79%|███████▉  | 210/266 [09:17<00:18,  2.98it/s]

Downloaded: https://live.staticflickr.com/2494/3763924924_4ca4ce3dba_b.jpg


Downloading images:  79%|███████▉  | 211/266 [09:17<00:16,  3.33it/s]

Downloaded: https://live.staticflickr.com/5204/5378968848_44ee2f48d3_b.jpg


Downloading images:  80%|███████▉  | 212/266 [09:17<00:15,  3.54it/s]

Downloaded: https://live.staticflickr.com/65535/52903297831_28f5aa259b_b.jpg
Downloaded: https://live.staticflickr.com/8194/8423026632_a8276b0a6b_b.jpg


Downloading images:  81%|████████  | 215/266 [09:18<00:13,  3.79it/s]

Downloaded: https://live.staticflickr.com/598/31923282305_2ca8fcdbe8_b.jpg
Downloaded: https://live.staticflickr.com/140/399481269_ea15c25c9b_b.jpg


Downloading images:  82%|████████▏ | 217/266 [09:18<00:09,  5.24it/s]

Downloaded: https://live.staticflickr.com/1131/5158037191_d6b7011222_b.jpg
Downloaded: https://live.staticflickr.com/7162/6645754627_b5a473a34f_b.jpg


Downloading images:  83%|████████▎ | 220/266 [09:19<00:10,  4.53it/s]

Downloaded: https://live.staticflickr.com/8526/8453773092_123dd5a5b1_b.jpg
Downloaded: https://live.staticflickr.com/65535/47005082774_9cffecc055.jpg
Downloaded: https://live.staticflickr.com/1702/25718576345_72b7517dc2_b.jpg
Downloaded: https://live.staticflickr.com/1849/42992447700_9e6180fd19_b.jpg


Downloading images:  84%|████████▍ | 224/266 [09:20<00:07,  5.25it/s]

Downloaded: https://live.staticflickr.com/8245/8674348283_78024e360a_b.jpg
Downloaded: https://live.staticflickr.com/373/20375837811_3cec1e1df9_b.jpg
Downloaded: https://live.staticflickr.com/388/31823790605_516895fa67_b.jpg


Downloading images:  85%|████████▍ | 225/266 [09:20<00:10,  4.07it/s]

Downloaded: https://live.staticflickr.com/2914/13985786579_32aa81aca4_b.jpg


Downloading images:  85%|████████▌ | 227/266 [09:21<00:08,  4.42it/s]

Downloaded: https://live.staticflickr.com/8189/8103072177_a1bd831fa2_b.jpg
Downloaded: https://live.staticflickr.com/65535/51095091688_17e448ddb6_b.jpg
Downloaded: https://live.staticflickr.com/4045/4378269263_e40fc012a8_b.jpg


Downloading images:  86%|████████▌ | 229/266 [09:21<00:07,  4.88it/s]

Downloaded: https://live.staticflickr.com/3649/3957799715_5407ce0750_b.jpg


Downloading images:  86%|████████▋ | 230/266 [09:21<00:08,  4.23it/s]

Downloaded: https://live.staticflickr.com/65535/53787978252_88d56778e4_b.jpg


Downloading images:  87%|████████▋ | 232/266 [09:22<00:07,  4.82it/s]

Downloaded: https://live.staticflickr.com/65535/52712003999_cba6904a86_b.jpg
Downloaded: https://live.staticflickr.com/7252/7747441598_7fcaf25025_b.jpg


Downloading images:  88%|████████▊ | 233/266 [09:22<00:08,  4.04it/s]

Downloaded: https://live.staticflickr.com/2424/3793337820_7be27c5344_b.jpg
Downloaded: https://live.staticflickr.com/65535/53676817514_4218932174_b.jpg


Downloading images:  89%|████████▊ | 236/266 [09:22<00:05,  5.34it/s]

Downloaded: https://live.staticflickr.com/4555/24824364988_1c0da9f029_b.jpg
Downloaded: https://live.staticflickr.com/3867/15380757806_32dd69f03f_b.jpg
Downloaded: https://live.staticflickr.com/5560/14952130995_f4837ac1f0_b.jpg


Downloading images:  90%|████████▉ | 239/266 [09:23<00:04,  6.21it/s]

Downloaded: https://live.staticflickr.com/220/520164687_dcb0b78f96_b.jpg
Downloaded: https://live.staticflickr.com/5004/5206158590_33872e6b34_b.jpg


Downloading images:  91%|█████████ | 242/266 [09:23<00:03,  7.43it/s]

Downloaded: https://live.staticflickr.com/2839/9581144080_3613848798_b.jpg
Downloaded: https://live.staticflickr.com/5695/21074580911_432ce0beb2_b.jpg
Downloaded: https://live.staticflickr.com/8270/8988751362_70acb6648c_b.jpg


Downloading images:  92%|█████████▏| 245/266 [09:24<00:03,  6.39it/s]

Downloaded: https://live.staticflickr.com/4068/4487309255_22889018bc_b.jpg
Downloaded: https://live.staticflickr.com/3706/13735742764_31843f6e46_b.jpg
Downloaded: https://live.staticflickr.com/485/31643806045_8bc983e054_b.jpg


Downloading images:  92%|█████████▏| 246/266 [09:24<00:04,  4.83it/s]

Downloaded: https://live.staticflickr.com/60/200329501_5541f6e65c_b.jpg
Downloaded: https://live.staticflickr.com/65535/48065021438_fc467a812a_b.jpg


Downloading images:  93%|█████████▎| 248/266 [09:25<00:03,  4.79it/s]

Downloaded: https://live.staticflickr.com/460/19250957526_6564b457c5_b.jpg
Downloaded: https://live.staticflickr.com/65535/52256587492_ba1ddcdb4c_b.jpg


Downloading images:  94%|█████████▍| 251/266 [09:25<00:02,  5.67it/s]

Downloaded: https://live.staticflickr.com/3792/8966548467_312f2ed006_b.jpg
Downloaded: https://live.staticflickr.com/630/20547899773_556b94d7d3_b.jpg
Downloaded: https://live.staticflickr.com/3305/3570797337_894a2d9350_b.jpg


Downloading images:  95%|█████████▌| 253/266 [09:25<00:02,  6.05it/s]

Downloaded: https://live.staticflickr.com/8361/8306916640_e667cd14b2_b.jpg


Downloading images:  97%|█████████▋| 257/266 [09:26<00:01,  7.48it/s]

Downloaded: https://live.staticflickr.com/8485/8178686242_046a860c0b.jpg
Downloaded: https://live.staticflickr.com/5552/15136861338_0f8d0ca0af_b.jpg
Downloaded: https://live.staticflickr.com/6069/6106893415_4ce81c6610_b.jpg
Downloaded: https://live.staticflickr.com/5555/14841498385_b6c4fd86ab_b.jpg


Downloading images:  97%|█████████▋| 258/266 [09:26<00:01,  6.41it/s]

Downloaded: https://live.staticflickr.com/6074/6117189965_e026487db2_b.jpg


Downloading images:  98%|█████████▊| 260/266 [09:27<00:01,  5.98it/s]

Downloaded: https://live.staticflickr.com/2679/4384395986_fcd011d5e1_b.jpg
Downloaded: https://live.staticflickr.com/747/22288603600_ac3080cab3_b.jpg


Downloading images:  98%|█████████▊| 261/266 [09:27<00:01,  3.84it/s]

Downloaded: https://live.staticflickr.com/4628/39700034572_76878a296d_b.jpg


Downloading images:  98%|█████████▊| 262/266 [09:27<00:01,  3.49it/s]

Downloaded: https://live.staticflickr.com/65535/49629211101_d46f735055.jpg
Downloaded: https://live.staticflickr.com/1380/5153065721_d56cf6c561_b.jpg


Downloading images: 100%|█████████▉| 265/266 [09:28<00:00,  4.86it/s]

Downloaded: https://live.staticflickr.com/1560/24635030261_def899db51_b.jpg
Downloaded: https://live.staticflickr.com/5258/5489755530_b0596ef230_b.jpg


Downloading images: 100%|██████████| 266/266 [09:28<00:00,  2.14s/it]

Downloaded: https://live.staticflickr.com/65535/53671954433_c45be77a79_b.jpg





In [6]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive/')

# Function to check and preprocess images
def check_and_preprocess_images(image_dir):
    """
    Check and preprocess images in the specified directory.

    Parameters:
    image_dir (str): The directory containing the images to be checked and preprocessed.

    Returns:
    None
    """
    for root, _, files in os.walk(image_dir):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    # Check if image is smaller than 50x50 pixels
                    if img.size[0] < 50 or img.size[1] < 50:
                        os.remove(file_path)
                        print(f"Deleted {file_path}: Image too small ({img.size[0]}x{img.size[1]})")
                        continue

                    # Convert non-RGB images to RGB
                    if img.mode != 'RGB':
                        img = img.convert('RGB')
                        img.save(file_path)
                        print(f"Converted {file_path} to RGB")

            except Exception as e:
                # If file is not an image, delete it
                os.remove(file_path)
                print(f"Deleted {file_path}: Not an image or corrupted file ({str(e)})")

# Call the function to check and preprocess images in the 'Dataset' directory
check_and_preprocess_images('Dataset')

# Zip the processed dataset and save it to Google Drive
!zip -r /content/drive/MyDrive/Clean_Dataset.zip Dataset


Mounted at /content/drive/
Converted Dataset/animal/Cheetah/4838389045_d5fc08ca2b_b.jpg to RGB
Converted Dataset/scenery/desert/52712003999_cba6904a86_b.jpg to RGB
Converted Dataset/scenery/volcano/49629211101_d46f735055.jpg to RGB
  adding: Dataset/ (stored 0%)
  adding: Dataset/animal/ (stored 0%)
  adding: Dataset/animal/horse/ (stored 0%)
  adding: Dataset/animal/horse/38183707732_f4ca7d3b43_b.jpg (deflated 0%)
  adding: Dataset/animal/horse/5412377706_d77700fc38_b.jpg (deflated 1%)
  adding: Dataset/animal/Bee/ (stored 0%)
  adding: Dataset/animal/Bee/14142817779_568406cd86_b.jpg (deflated 0%)
  adding: Dataset/animal/Bee/23809746334_43928c099f_b.jpg (deflated 0%)
  adding: Dataset/animal/bird/ (stored 0%)
  adding: Dataset/animal/bird/21709064798_67e964516d_b.jpg (deflated 0%)
  adding: Dataset/animal/bird/28761726521_6aee5e6315_b.jpg (deflated 0%)
  adding: Dataset/animal/Snail/ (stored 0%)
  adding: Dataset/animal/Snail/6662983427_4f10f9687a_b.jpg (deflated 0%)
  adding: Datase

In [7]:
# Download the file using gdown
!gdown --id 1--6fe48D9ydnTpLV1GKKqJ0pqpOXB3z_

# Unzip the downloaded file
!unzip Clean_Dataset.zip

# Import necessary modules
import os
import shutil
from collections import defaultdict

# Define the source and target directories
source_dir = "Dataset"
train_dir = "data/train"
test_dir = "data/test"

# Create the target directories if they don’t exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Initialize a dictionary to hold file paths for each class
class_files = defaultdict(list)

# Read the file paths from the text file
with open('filename.txt', 'r') as file:
    lines = file.readlines()
    for line in lines:
        line = line.strip()
        if line:
            # Extract the class name from the path
            parts = line.split('/')
            class_name = parts[2]  # Structure: Dataset/category/class/image.jpg
            class_files[class_name].append(line)

# Move images to the train and test directories
for class_name, files in class_files.items():
    # Create the train and test directories for the class
    train_class_dir = os.path.join(train_dir, class_name)
    test_class_dir = os.path.join(test_dir, class_name)
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)

    # Move 19 images to train and 1 image to test
    for i, file_path in enumerate(files):
        if i == 0:
            shutil.copy(file_path, test_class_dir)
        elif i < 20:
            shutil.copy(file_path, train_class_dir)

print("Dataset organization complete!")


Downloading...
From (original): https://drive.google.com/uc?id=1--6fe48D9ydnTpLV1GKKqJ0pqpOXB3z_
From (redirected): https://drive.google.com/uc?id=1--6fe48D9ydnTpLV1GKKqJ0pqpOXB3z_&confirm=t&uuid=bbbf8f2d-7329-4239-a56a-41280e1ed964
To: /content/Clean_Dataset.zip
100% 483M/483M [00:14<00:00, 33.9MB/s]
Archive:  Clean_Dataset.zip
replace Dataset/plant/Clove/53742600482_be65dab11c_b.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: Dataset/plant/Clove/53742600482_be65dab11c_b.jpg  
  inflating: Dataset/plant/Clove/6499672315_313ec22f51_b.jpg  
  inflating: Dataset/plant/Clove/22440611562_e293ed2cb6_b.jpg  
  inflating: Dataset/plant/Clove/46819415374_03d9b1b5b4_b.jpg  
  inflating: Dataset/plant/Clove/16282799783_cd1a50bdb9_b.jpg  
  inflating: Dataset/plant/Clove/217346923_f48383a640_b.jpg  
  inflating: Dataset/plant/Clove/8430302275_0d5f2b8693_b.jpg  
  inflating: Dataset/plant/Clove/33921765462_44448ddcda_b.jpg  
  inflating: Dataset/plant/Clove/51460352459_ac3eff9325_b.jpg  