In [1]:
import os
import shutil
import random

def process_cat_images(root_dir='cat/images'):
    # Iterate over each breed folder in the root directory
    for breed_folder in os.listdir(root_dir):
        breed_path = os.path.join(root_dir, breed_folder)
        
        if os.path.isdir(breed_path):
            # List all image files in the breed folder
            image_files = [f for f in os.listdir(breed_path) if os.path.isfile(os.path.join(breed_path, f))]
            image_count = len(image_files)
            
            if image_count < 2500:
                # If the breed folder contains less than 2500 images, delete the folder
                shutil.rmtree(breed_path)
                print(f"Deleted folder: {breed_path}")
            # elif image_count > 1500:
            #     # If the breed folder contains more than 1500 images, randomly select 1500 to keep
            #     images_to_keep = random.sample(image_files, 1500)
            #     for image_file in image_files:
            #         if image_file not in images_to_keep:
            #             os.remove(os.path.join(breed_path, image_file))
            #     print(f"Reduced folder: {breed_path} to 1500 images")

if __name__ == "__main__":
    process_cat_images()


Deleted folder: cat/images\Abyssinian
Deleted folder: cat/images\American Bobtail
Deleted folder: cat/images\American Curl
Deleted folder: cat/images\American Wirehair
Deleted folder: cat/images\Applehead Siamese
Deleted folder: cat/images\Balinese
Deleted folder: cat/images\Bengal
Deleted folder: cat/images\Birman
Deleted folder: cat/images\Bombay
Deleted folder: cat/images\British Shorthair
Deleted folder: cat/images\Burmese
Deleted folder: cat/images\Burmilla
Deleted folder: cat/images\Canadian Hairless
Deleted folder: cat/images\Chartreux
Deleted folder: cat/images\Chausie
Deleted folder: cat/images\Chinchilla
Deleted folder: cat/images\Cornish Rex
Deleted folder: cat/images\Cymric
Deleted folder: cat/images\Devon Rex
Deleted folder: cat/images\Egyptian Mau
Deleted folder: cat/images\Exotic Shorthair
Deleted folder: cat/images\Extra-Toes Cat - Hemingway Polydactyl
Deleted folder: cat/images\Havana
Deleted folder: cat/images\Himalayan
Deleted folder: cat/images\Japanese Bobtail
Dele

In [2]:
import os
import cv2
import shutil

def detect_cat_faces(image_path, face_cascade, output_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    if len(faces) == 0:
        os.remove(image_path)
        print(f"Deleted {image_path} because no face was detected.")
        return False
    
    for i, (x, y, w, h) in enumerate(faces):
        face_img = image[y:y+h, x:x+w]
        face_file_path = os.path.join(output_path, f"{os.path.basename(image_path).split('.')[0]}_face_{i}.jpg")
        cv2.imwrite(face_file_path, face_img)
    
    return True

def process_cat_images_with_face_detection(root_dir='cat/images', output_dir='cat_faces', cascade_path='haarcascade_frontalcatface.xml'):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    face_cascade = cv2.CascadeClassifier(cascade_path)
    
    for breed_folder in os.listdir(root_dir):
        breed_path = os.path.join(root_dir, breed_folder)
        output_breed_path = os.path.join(output_dir, breed_folder)
        
        if os.path.isdir(breed_path):
            if not os.path.exists(output_breed_path):
                os.makedirs(output_breed_path)
            
            for image_file in os.listdir(breed_path):
                image_path = os.path.join(breed_path, image_file)
                if os.path.isfile(image_path):
                    detected = detect_cat_faces(image_path, face_cascade, output_breed_path)
                    if detected:
                        print(f"Processed {image_path}")

if __name__ == "__main__":
    process_cat_images_with_face_detection()


Processed cat/images\American Shorthair\38983619_7310.jpg
Deleted cat/images\American Shorthair\40578169_9399.jpg because no face was detected.
Deleted cat/images\American Shorthair\40652354_8840.jpg because no face was detected.
Processed cat/images\American Shorthair\41193941_9701.jpg
Deleted cat/images\American Shorthair\41320306_9999.jpg because no face was detected.
Processed cat/images\American Shorthair\41320316_9998.jpg
Deleted cat/images\American Shorthair\41327416_9990.jpg because no face was detected.
Processed cat/images\American Shorthair\41327462_9989.jpg
Deleted cat/images\American Shorthair\41327670_9988.jpg because no face was detected.
Deleted cat/images\American Shorthair\41333469_9985.jpg because no face was detected.
Processed cat/images\American Shorthair\41333512_9984.jpg
Processed cat/images\American Shorthair\41336587_9983.jpg
Processed cat/images\American Shorthair\41336596_9982.jpg
Deleted cat/images\American Shorthair\41339518_9980.jpg because no face was de

In [3]:
import os
import shutil
import random

def process_cat_images(root_dir='./cat_faces/'):
    # Iterate over each breed folder in the root directory
    for breed_folder in os.listdir(root_dir):
        breed_path = os.path.join(root_dir, breed_folder)
        
        if os.path.isdir(breed_path):
            # List all image files in the breed folder
            image_files = [f for f in os.listdir(breed_path) if os.path.isfile(os.path.join(breed_path, f))]
            image_count = len(image_files)
            
            if image_count < 1000:
                # If the breed folder contains less than 1000 images, delete the folder
                shutil.rmtree(breed_path)
                print(f"Deleted folder: {breed_path}")
            elif image_count > 1000:
                # If the breed folder contains more than 1000 images, randomly select 1000 to keep
                images_to_keep = random.sample(image_files, 1000)
                for image_file in image_files:
                    if image_file not in images_to_keep:
                        os.remove(os.path.join(breed_path, image_file))
                print(f"Reduced folder: {breed_path} to 1000 images")

if __name__ == "__main__":
    process_cat_images()

Reduced folder: ./cat_faces/American Shorthair to 1000 images
Reduced folder: ./cat_faces/Calico to 1000 images
Reduced folder: ./cat_faces/Dilute Calico to 1000 images
Reduced folder: ./cat_faces/Dilute Tortoiseshell to 1000 images
Reduced folder: ./cat_faces/Domestic Long Hair to 1000 images
Reduced folder: ./cat_faces/Domestic Medium Hair to 1000 images
Reduced folder: ./cat_faces/Domestic Short Hair to 1000 images
Reduced folder: ./cat_faces/Persian to 1000 images
Reduced folder: ./cat_faces/Ragdoll to 1000 images
Reduced folder: ./cat_faces/Siamese to 1000 images
Reduced folder: ./cat_faces/Tabby to 1000 images
Reduced folder: ./cat_faces/Torbie to 1000 images
