<a href="https://colab.research.google.com/github/kebora/machine-learning/blob/main/sort_code-hagrid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import os
import shutil
from collections import defaultdict

# Set the path to your images folder in Google Drive
images_folder = '/content/drive/MyDrive/hagrid_seven'

# Set the path for the new output folder
output_folder = '/content/drive/MyDrive/hagrid_sorted'
os.makedirs(output_folder, exist_ok=True)

# Target the 'peace' category and set the limit
category_to_target = 'one'
limit_per_category = 300

# Create a counter for the category
category_count = defaultdict(int)

# List all images in the images folder
image_files = os.listdir(images_folder)

# Check if images are detected
print(f"Number of files detected: {len(image_files)}")
print(f"First 10 files: {image_files[:10]}")  # Print the first 10 files to check

# Function to check if 'peace' is in the filename
def is_peace_category(filename):
    return category_to_target in filename

# Loop over the images and copy only those with 'peace' in the filename
for image_file in image_files:
    # Check if the current image belongs to the 'peace' category
    if is_peace_category(image_file):
        category = category_to_target
        print(f"Processing file: {image_file}, detected category: {category}")

        # If we haven't reached the limit for the 'peace' category
        if category_count[category] < limit_per_category:
            # Create the 'peace' folder if it doesn't exist
            category_folder = os.path.join(output_folder, category)
            os.makedirs(category_folder, exist_ok=True)

            # Increment the category count for the new filename (e.g., 1.jpg, 2.jpg, etc.)
            category_count[category] += 1
            new_filename = f"{category_count[category]}.jpg"

            # Copy the image to the new 'peace' folder with the new filename
            src_path = os.path.join(images_folder, image_file)
            dest_path = os.path.join(category_folder, new_filename)

            # Check if paths are correct before copying
            print(f"Copying from {src_path} to {dest_path}")

            # Copy the file
            shutil.copyfile(src_path, dest_path)

        # Break the loop if we have reached the limit
        if category_count[category] >= limit_per_category:
            break

print(f"Image sorting and renaming completed for the '{category_to_target}' category. Limited to {limit_per_category} images.")


Number of files detected: 12566
First 10 files: ['70bd5329-ce7c-4a18-9f08-e80834651923_stop_1.jpg', '710bc618-a0dc-4c62-8a2e-0f2e802f21b0_stop_0.jpg', '71777d35-1869-4eb1-b708-66c71dfe99bf_stop_0.jpg', '71f55a8f-621f-4d5a-9b1c-5d0cd0c2769f_stop_0.jpg', '72544ba3-bc2a-431e-a0f1-fc037236b3a5_stop_0.jpg', '72808f99-83e3-400b-aa51-5f0bb74e059d_stop_0.jpg', '73926c37-38ea-4a42-97a7-6ab24da6f11b_stop_0.jpg', '7447a8e3-39a1-44e4-8623-9a90ba41874c_stop_0.jpg', '74aca726-ef32-4750-9059-3602904929f3_stop_0.jpg', '74c777b4-1710-4027-92ef-2a02fd7f6887_stop_0.jpg']
Processing file: 242d24b7-6d4c-42db-a4af-686bdcc9f6d4_one_0.jpg, detected category: one
Copying from /content/drive/MyDrive/hagrid_seven/242d24b7-6d4c-42db-a4af-686bdcc9f6d4_one_0.jpg to /content/drive/MyDrive/hagrid_sorted/one/1.jpg
Processing file: 2458259d-fd31-4a20-b3ed-f68f9ebbc98a_one_0.jpg, detected category: one
Copying from /content/drive/MyDrive/hagrid_seven/2458259d-fd31-4a20-b3ed-f68f9ebbc98a_one_0.jpg to /content/drive/MyDri