In [6]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from collections import Counter
import joblib

# Directory for the dataset
data_dir = "/Users/samikazi/Desktop/GithubProject/dataset"

# Set image parameters
img_height, img_width = 128, 128

# Helper function to load and preprocess images
def load_images_from_directory(directory, target_size):
    images = []
    labels = []
    class_names = sorted(os.listdir(directory))
    
    for class_index, class_name in enumerate(class_names):
        class_path = os.path.join(directory, class_name)
        
        if os.path.isdir(class_path):
            class_images = []
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                try:
                    img = load_img(img_path, target_size=target_size)
                    img_array = img_to_array(img)
                    img_array = img_array.flatten()  # Flatten the 2D image to a 1D array
                    class_images.append(img_array)
                except Exception as e:
                    print(f"Could not load image {img_path}: {e}")
            
            # Add class images if it has at least two samples
            if len(class_images) >= 2:
                images.extend(class_images)
                labels.extend([class_index] * len(class_images))
            else:
                print(f"Skipping class '{class_name}' due to insufficient images.")
                    
    return np.array(images), np.array(labels), class_names

# Load and preprocess images
images, labels, class_names = load_images_from_directory(data_dir, target_size=(img_height, img_width))

# Check class distribution after filtering
class_counts = Counter(labels)
print("Class distribution after filtering:", class_counts)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    images, labels, test_size=0.2, random_state=42, stratify=labels
)

# Initialize and train a Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions and calculate accuracy on the validation set
y_pred = rf_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Save the trained model
joblib.dump(rf_model, "animal_classifier_rf_model.pkl")
print("Model saved as 'animal_classifier_rf_model.pkl'")


Skipping class 'cat' due to insufficient images.
Class distribution after filtering: Counter({2: 181, 5: 176, 4: 169, 3: 3})
Validation Accuracy: 88.68%
Model saved as 'animal_classifier_rf_model.pkl'


In [5]:
import os
from icrawler.builtin import GoogleImageCrawler
import threading
import time

# Create dataset folders
def create_folders(main_folder, classes):
    os.makedirs(main_folder, exist_ok=True)
    for class_name in classes:
        os.makedirs(os.path.join(main_folder, class_name), exist_ok=True)
    print(f"Folders created for classes: {', '.join(classes)}")

# Download images for a single class with retry logic
def download_images_for_class(class_name, limit, main_folder):
    output_dir = os.path.join(main_folder, class_name)
    max_attempts = 5
    downloaded = 0

    for attempt in range(max_attempts):
        try:
            print(f"[{class_name}] Attempt {attempt + 1}: Downloading images...")
            crawler = GoogleImageCrawler(
                feeder_threads=1,
                parser_threads=2,
                downloader_threads=4,
                storage={'root_dir': output_dir}
            )
            crawler.crawl(
                keyword=class_name,
                max_num=limit - downloaded,
                min_size=(200, 200),
                max_size=None,
                file_idx_offset='auto'
            )
            
            downloaded = len([f for f in os.listdir(output_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
            print(f"[{class_name}] {downloaded}/{limit} images downloaded.")

            if downloaded >= limit:
                break

        except Exception as e:
            print(f"[{class_name}] Error during attempt {attempt + 1}: {e}")
            time.sleep(2)  # Wait before retrying

    if downloaded < limit:
        print(f"[{class_name}] Warning: Only {downloaded}/{limit} images downloaded.")

# Main function to manage the download process
def main():
    # Define classes and dataset folder
    main_folder = "./dataset"
    classes = ['dog', 'cow', 'cat', 'lamb', 'zebra']
    images_per_class = 100

    # Create folders for dataset
    create_folders(main_folder, classes)

    # Use threads to download images concurrently
    threads = []
    for class_name in classes:
        thread = threading.Thread(target=download_images_for_class, args=(class_name, images_per_class, main_folder))
        threads.append(thread)
        thread.start()

    # Wait for all threads to complete
    for thread in threads:
        thread.join()

    print("Image downloading completed!")

if __name__ == "__main__":
    main()


2024-11-25 16:30:11,247 - INFO - icrawler.crawler - start crawling...
2024-11-25 16:30:11,249 - INFO - icrawler.crawler - start crawling...
2024-11-25 16:30:11,250 - INFO - icrawler.crawler - starting 1 feeder threads...
2024-11-25 16:30:11,250 - INFO - icrawler.crawler - start crawling...
2024-11-25 16:30:11,252 - INFO - icrawler.crawler - start crawling...
2024-11-25 16:30:11,253 - INFO - icrawler.crawler - start crawling...
2024-11-25 16:30:11,253 - INFO - icrawler.crawler - starting 1 feeder threads...
2024-11-25 16:30:11,254 - INFO - feeder - thread feeder-001 exit
2024-11-25 16:30:11,254 - INFO - icrawler.crawler - starting 2 parser threads...
2024-11-25 16:30:11,254 - INFO - icrawler.crawler - starting 1 feeder threads...
2024-11-25 16:30:11,255 - INFO - icrawler.crawler - starting 1 feeder threads...
2024-11-25 16:30:11,255 - INFO - icrawler.crawler - starting 1 feeder threads...
2024-11-25 16:30:11,257 - INFO - feeder - thread feeder-001 exit
2024-11-25 16:30:11,257 - INFO - i

Folders created for classes: dog, cow, cat, lamb, zebra
[dog] Attempt 1: Downloading images...
[cow] Attempt 1: Downloading images...
[cat] Attempt 1: Downloading images...
[lamb] Attempt 1: Downloading images...
[zebra] Attempt 1: Downloading images...


2024-11-25 16:30:12,637 - INFO - parser - parsing result page https://www.google.com/search?q=dog&ijn=0&start=0&tbs=&tbm=isch
2024-11-25 16:30:12,792 - INFO - parser - parsing result page https://www.google.com/search?q=zebra&ijn=0&start=0&tbs=&tbm=isch
2024-11-25 16:30:12,918 - INFO - parser - parsing result page https://www.google.com/search?q=cow&ijn=0&start=0&tbs=&tbm=isch
2024-11-25 16:30:13,031 - INFO - downloader - image #1	https://cdn.mos.cms.futurecdn.net/HjFE8NKWuCmgfHCcndJ3rK-1200-80.jpg
2024-11-25 16:30:13,114 - INFO - downloader - image #1	https://cdn.britannica.com/23/523-050-0C120420/cow-Holstein-Friesian.jpg
2024-11-25 16:30:13,148 - INFO - parser - parsing result page https://www.google.com/search?q=lamb&ijn=0&start=0&tbs=&tbm=isch
2024-11-25 16:30:13,207 - INFO - downloader - image #2	https://cdn.britannica.com/55/174255-050-526314B6/brown-Guernsey-cow.jpg
2024-11-25 16:30:13,265 - INFO - parser - no more page urls for thread parser-002 to parse
2024-11-25 16:30:13,26

[dog] 1/100 images downloaded.
[dog] Attempt 2: Downloading images...


2024-11-25 16:30:18,755 - INFO - downloader - image #14	https://cdn.britannica.com/53/157153-050-E5582B5A/Holstein-cow.jpg
2024-11-25 16:30:18,986 - INFO - downloader - image #4	https://static.wikia.nocookie.net/my-first-encyclopedia/images/6/60/Equus_quagga.jpg
2024-11-25 16:30:19,105 - INFO - downloader - image #15	https://a-z-animals.com/media/2022/10/Fleckvieh-Cattle-2-1024x683.jpg
2024-11-25 16:30:19,204 - INFO - downloader - image #5	https://i.natgeofe.com/n/14dd7036-78ee-46f4-b620-d5eaf7cebda7/zebra-akagera.jpg
2024-11-25 16:30:19,339 - INFO - downloader - image #7	https://images.getrecipekit.com/v1615995124_RedRubbedBabyLambChopsPg101_xyzuwo.jpg
2024-11-25 16:30:19,527 - INFO - downloader - image #8	https://images.squarespace-cdn.com/content/v1/61c2724e42bafd1109690e02/548b66da-5899-476c-aebf-22c5423eff2e/Lamb+with+tail+-+end+live+lamb+cutting.jpg
2024-11-25 16:30:19,532 - INFO - downloader - no more download task for thread downloader-001
2024-11-25 16:30:19,534 - INFO - downl

[cat] 1/100 images downloaded.
[cat] Attempt 2: Downloading images...


2024-11-25 16:30:21,765 - INFO - downloader - image #8	https://bsmedia.business-standard.com/_media/bs/img/article/2019-02/21/full/1550730493-8318.jpg
2024-11-25 16:30:22,063 - INFO - downloader - image #9	https://u4d2z7k9.rocketcdn.me/wp-content/uploads/2024/01/Untitled-683-x-1024-px-65-1.jpg
2024-11-25 16:30:22,184 - INFO - parser - no more page urls for thread parser-001 to parse
2024-11-25 16:30:22,187 - INFO - parser - thread parser-001 exit
2024-11-25 16:30:22,271 - INFO - downloader - image #10	https://a.storyblok.com/f/233702/5568x3712/c6eb9f21f1/zebra.jpg
2024-11-25 16:30:23,094 - INFO - downloader - image #10	https://sustainablefoodtrust.org/wp-content/uploads/2022/08/Ewes-and-Lambs.jpg
2024-11-25 16:30:23,096 - INFO - downloader - image #17	https://d147a5vd7kzml6.cloudfront.net/img/cowsignals_com/2614/2560x1440/resize:fixed/cowsignals_2560px.jpg
2024-11-25 16:30:23,310 - INFO - downloader - no more download task for thread downloader-002
2024-11-25 16:30:23,311 - INFO - down

[dog] 1/100 images downloaded.
[dog] Attempt 3: Downloading images...


2024-11-25 16:30:27,557 - INFO - downloader - image #18	https://images.ctfassets.net/ww1ie0z745y7/2s5xmDvVnpouRwhibYwD6Z/9d00c04b05e1be0055693eccf8eb6b51/daniel-sandvik-8tcYSwf-RPw-unsplash.jpg
2024-11-25 16:30:27,562 - INFO - downloader - image #19	https://images.immediate.co.uk/production/volatile/sites/30/2011/02/roast-rack-of-lamb-bb2419b.jpg
2024-11-25 16:30:27,737 - INFO - downloader - image #20	https://www.aussiebeefandlamb.co.uk/siteassets/names-for-lamb.jpg
2024-11-25 16:30:28,497 - INFO - downloader - image #21	http://www.vff.org.au/wp-content/uploads/2018/03/vff-blog-images-sheep-and-lamb-2.jpg
2024-11-25 16:30:28,814 - INFO - downloader - no more download task for thread downloader-001
2024-11-25 16:30:28,816 - INFO - downloader - thread downloader-001 exit
2024-11-25 16:30:28,841 - INFO - downloader - image #22	https://us.gozney.com/cdn/shop/articles/Wood-fired_Rack_of_Lamb_Brad_Carter3.jpg
2024-11-25 16:30:28,956 - INFO - downloader - image #23	https://images.squarespace-

[cat] 1/100 images downloaded.
[cat] Attempt 3: Downloading images...


2024-11-25 16:30:31,621 - INFO - downloader - image #22	https://clarklandfarm.com/img/animals/cows_6.jpg
2024-11-25 16:30:31,657 - INFO - downloader - image #29	http://beeinspiredgoods.com/cdn/shop/articles/20230318073730-lamb-rack-sharing-683x1024.jpg
2024-11-25 16:30:31,714 - ERROR - downloader - Response status code 404, file https://upload.wikimedia.org/wikipedia/commons/thumb/6/69/Equus_zebra_-_Disney%27s_Animal_Kingdom_Lodge%2C_Orlando%2C_Florida%2C_USA_-_20100119.jpg
2024-11-25 16:30:31,753 - INFO - downloader - image #20	https://www.treehugger.com/thmb/qFhPReYPPaVgTtHBOthYeMJVeZ0=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/GettyImages-1043597638-49acd69677d7442588c1d8930d298a59.jpg
2024-11-25 16:30:31,855 - INFO - parser - no more page urls for thread parser-001 to parse
2024-11-25 16:30:31,856 - INFO - parser - thread parser-001 exit
2024-11-25 16:30:32,032 - ERROR - downloader - Response status code 400, file https://media.gettyimages.com/id/498178753/photo/lam

[dog] 2/100 images downloaded.
[dog] Attempt 4: Downloading images...


2024-11-25 16:30:38,281 - INFO - downloader - image #41	https://static.wixstatic.com/media/5fee1b_d1f5825a1db3417292f4912a127a42ea~mv2_d_3072_2304_s_2.jpg
2024-11-25 16:30:38,495 - INFO - downloader - no more download task for thread downloader-001
2024-11-25 16:30:38,497 - INFO - downloader - thread downloader-001 exit
2024-11-25 16:30:38,551 - INFO - downloader - image #42	https://img.freepik.com/free-vector/hand-drawn-lamb-silhouette-set_23-2150626512.jpg
2024-11-25 16:30:38,821 - INFO - downloader - image #24	https://images.takeshape.io/86ce9525-f5f2-4e97-81ba-54e8ce933da7/dev/2710650f-cd0f-4e10-95d9-f1fb7af85f92/Lonely%20beautiful%20hartmann's%20mountain%20zebra%20stands%20in%20its%20natural%20habitat.%20Wildlife%20shutterstock_1789778813%20(1).jpg
2024-11-25 16:30:38,826 - INFO - downloader - image #25	https://cdn.animalsaustralia.org/wp-content/uploads/2021/11/25105031/CowFriendships.jpg
2024-11-25 16:30:38,839 - INFO - downloader - image #43	https://www.ldoceonline.com/media/en

[cat] 1/100 images downloaded.
[cat] Attempt 4: Downloading images...


2024-11-25 16:30:39,609 - INFO - downloader - image #27	https://drinkmilkinglassbottles.com/wp-content/uploads/2017/01/5-Fun-Facts-About-Cows-Debunking-Common-Myths.jpg
2024-11-25 16:30:39,792 - INFO - parser - parsing result page https://www.google.com/search?q=dog&ijn=0&start=0&tbs=&tbm=isch
2024-11-25 16:30:40,005 - INFO - downloader - image #28	https://www.mittaldairyfarms.com/images/cowcare/cowcare.png
2024-11-25 16:30:40,324 - INFO - downloader - image #45	https://www.simplyrecipes.com/thmb/83ck-6L5oPbNWPxfpAKEDhO5xdc=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/__opt__aboutcom__coeus__resources__content_migration__simply_recipes__uploads__2013__02__Rosemary-Lamb-Chops-LEAD-1-dce9734297f94d379b198ce36055ff8f.jpg
2024-11-25 16:30:40,411 - INFO - downloader - image #29	https://www.idausa.org/assets/components/phpthumbof/cache/blogmain-cows-0621-source-ida.8eaef410aa188631bc63e21150c2f3af.jpg
2024-11-25 16:30:40,496 - INFO - downloader - image #26	https://images.square

[dog] 3/100 images downloaded.
[dog] Attempt 5: Downloading images...


2024-11-25 16:30:47,696 - INFO - downloader - image #63	https://seedscereals.co.nz/wp-content/uploads/2017/08/Lamb-Jacket-1.jpg
2024-11-25 16:30:47,748 - INFO - downloader - image #34	https://www.jamboreetours.com/wp-content/uploads/2020/10/african-zebra.jpg
2024-11-25 16:30:48,620 - INFO - downloader - image #35	https://images.desenio.com/zoom/2400_1.jpg
2024-11-25 16:30:48,967 - INFO - parser - no more page urls for thread parser-001 to parse
2024-11-25 16:30:48,969 - INFO - parser - thread parser-001 exit
2024-11-25 16:30:49,084 - INFO - downloader - image #64	https://krollskorner.com/wp-content/uploads/2019/05/lamb1-scaled.jpg
2024-11-25 16:30:49,234 - INFO - downloader - image #36	https://www.lpzoo.org/wp-content/uploads/2022/12/0142_Banner-143.jpg
2024-11-25 16:30:49,410 - INFO - downloader - no more download task for thread downloader-003
2024-11-25 16:30:49,411 - INFO - downloader - no more download task for thread downloader-004
2024-11-25 16:30:49,412 - INFO - downloader - no

[cat] 1/100 images downloaded.
[cat] Attempt 5: Downloading images...


2024-11-25 16:30:49,833 - INFO - downloader - image #65	https://ausorganicmeatco.com.au/cdn/shop/files/Lamb_Cuts_1200x.jpg
2024-11-25 16:30:50,143 - ERROR - downloader - Response status code 403, file https://anexa.co.nz/wp-content/uploads/2022/07/Down-cow-bag-over-ribs-1-1200-1-1024x768.jpg
2024-11-25 16:30:50,176 - INFO - downloader - image #66	https://hips.hearstapps.com/hmg-prod/images/rack-of-lamb-index-65d8d5c8dffd9.jpg
2024-11-25 16:30:50,192 - INFO - downloader - image #52	https://www.ciwf.org.uk/media/3836620/Dairy-cow.jpg
2024-11-25 16:30:50,336 - INFO - downloader - image #67	https://media.post.rvohealth.io/wp-content/uploads/sites/3/2023/12/Lamb_cholesterol_GettyImages585144810_Header-1024x575.jpg
2024-11-25 16:30:50,455 - INFO - downloader - image #53	https://i.pinimg.com/originals/d3/81/9f/d3819f2499c3ecb75e3b270f2953307d.jpg
2024-11-25 16:30:50,578 - INFO - downloader - image #54	https://animalequality.in/app/uploads/2022/06/Two-cows-love-bond.jpg
2024-11-25 16:30:50,998

[cat] 1/100 images downloaded.


2024-11-25 16:30:56,678 - INFO - downloader - image #74	https://images.unsplash.com/photo-1448227700746-d8eab5a1b9d7?fm=jpg
2024-11-25 16:30:56,812 - INFO - downloader - image #42	https://www.science.org/do/10.1126/science.aaa6333/abs/sn-zebrah.jpg
2024-11-25 16:30:56,814 - ERROR - downloader - Response status code 400, file https://media.istockphoto.com/id/1428640160/photo/cow-isolated-on-white-standing-upright-black-and-white-full-length-and-front-view-and-copy.jpg
2024-11-25 16:30:56,858 - INFO - downloader - image #67	https://www.daysoftheyear.com/wp-content/uploads/cow-appreciation-day-1.jpg
2024-11-25 16:30:56,898 - INFO - downloader - image #43	https://destinationuganda.com/wp-content/uploads/2020/07/burchells-zebra-uganda-004.jpg
2024-11-25 16:30:56,915 - INFO - downloader - image #68	https://provenir.com.au/wp-content/uploads/2024/03/image1-1024x725.jpg
2024-11-25 16:30:57,090 - INFO - downloader - no more download task for thread downloader-002
2024-11-25 16:30:57,091 - INFO 

[dog] 3/100 images downloaded.


2024-11-25 16:30:57,789 - INFO - downloader - no more download task for thread downloader-004
2024-11-25 16:30:57,789 - INFO - downloader - thread downloader-004 exit
2024-11-25 16:30:57,838 - INFO - downloader - image #69	https://www.homebiogas.com/wp-content/uploads/2023/09/shutterstock_1841069923-scaled-1.jpg
2024-11-25 16:30:58,058 - INFO - downloader - image #45	https://www.zambiatourism.com/media/dreamstime_xxl_25848303.jpg
2024-11-25 16:30:58,158 - INFO - downloader - image #70	https://agvance.co.nz/wp-content/uploads/2024/05/DSCN4812-1024x768.jpg
2024-11-25 16:30:58,441 - INFO - downloader - image #46	https://i.ytimg.com/vi/kWxnadQI5Qw/hq720.jpg
2024-11-25 16:30:58,844 - INFO - downloader - image #47	https://www.folly-farm.co.uk/wp-content/uploads/2018/03/zebra-foal-7.jpg
2024-11-25 16:31:00,436 - INFO - downloader - image #48	https://animaldiversity.org/collections/contributors/Grzimek_mammals/Equiidae/Equus_zebra/medium.jpg
2024-11-25 16:31:00,532 - INFO - downloader - image 

[lamb] 169/100 images downloaded.


2024-11-25 16:31:02,845 - INFO - downloader - no more download task for thread downloader-003
2024-11-25 16:31:02,846 - INFO - downloader - thread downloader-003 exit
2024-11-25 16:31:02,938 - INFO - downloader - image #54	https://i.redd.it/x8h3agfhipt81.jpg
2024-11-25 16:31:03,166 - INFO - downloader - no more download task for thread downloader-001
2024-11-25 16:31:03,168 - INFO - downloader - thread downloader-001 exit
2024-11-25 16:31:03,611 - INFO - downloader - image #55	https://images.rzss.org.uk/media/Edinburgh_Zoo/EZ_animals/Grevy's_zebra/grevy's%20zebra%204.jpg
2024-11-25 16:31:03,747 - INFO - downloader - image #56	https://static01.nyt.com/images/2024/05/04/multimedia/04xp-zebra-kqfg/04xp-zebra-kqfg-mediumSquareAt3X.jpg
2024-11-25 16:31:04,177 - INFO - downloader - image #57	https://www.awf.org/sites/default/files/styles/species_image/public/2020-04/Website_SpeciesPage_GrevysZebra02_Challenges.jpg
2024-11-25 16:31:04,578 - INFO - downloader - image #58	https://images.twinkl.

[cow] 181/100 images downloaded.


2024-11-25 16:31:06,773 - INFO - downloader - image #65	https://media.greenmatters.com/brand-img/dGyFOJJmN/0x0/international-zebra-day6-1611606399511.jpg
2024-11-25 16:31:07,180 - INFO - downloader - image #66	https://www.krugerpark.co.za/images/zebra-herd-roger-delaharpe-590x390.jpg
2024-11-25 16:31:07,236 - INFO - downloader - image #67	https://img.freepik.com/free-vector/cute-zebra-cartoon-animal-character_1308-154268.jpg
2024-11-25 16:31:07,385 - INFO - downloader - image #68	https://wildtimes.buildingourzoo.com/wp-content/uploads/2022/01/Copy-of-Copy-of-Zebra-mom-_-baby.jpg
2024-11-25 16:31:08,976 - INFO - downloader - image #69	https://cdn.britannica.com/85/219885-050-92BF3722/two-zebras-Serengeti-National-Park-Tanzania.jpg
2024-11-25 16:31:09,932 - ERROR - downloader - Exception caught when downloading file https://www.kansascity.com/latest-news/brahna/picture291929150/alternates/LANDSCAPE_1140/Screenshot%202024-09-04%20115435.png, error: HTTPSConnectionPool(host='www.kansascity

[zebra] 176/100 images downloaded.
Image downloading completed!
