# Dataset Utils / Tools

### Count images in folder

In [None]:
import os

# Define the directory containing the images
directory = '/kaggle/working/cropped_sweetspot_images'

# Supported image extensions
image_extensions = ('.png', '.jpg', '.jpeg')

# Initialize a counter for images
image_count = 0

# Use os.walk to traverse directories and subdirectories
for root, dirs, files in os.walk(directory):
    # Loop through each file in the current directory
    for file in files:
        if file.endswith(image_extensions):
            image_count += 1

# Print the total count of images
print(f"Total number of images in the folder and subfolders: {image_count}")

### Find Substring in directory files

In [None]:
import os

# Define directory for preprocessed images
preprocessed_dir = '/kaggle/working/cropped_muzzle_images'

# Define the substring to search for
substring = 'classified'  # Replace with your specific substring

# Initialize count
count = 0

# Iterate through all files in the directory
for filename in os.listdir(preprocessed_dir):
    if filename.endswith(('.png', '.jpg', '.jpeg')) and substring in filename:
        count += 1

# Print the count
print(f"Number of files containing '{substring}' in the filename: {count}")


### Delete Directory Contents

In [None]:
import os

# Directory containing preprocessed images
preprocessed_dir = '/kaggle/working/preprocessed_sweetspot_images'

# Check if directory exists
if os.path.exists(preprocessed_dir):
    # Iterate through the files in the directory and remove them
    for filename in os.listdir(preprocessed_dir):
        file_path = os.path.join(preprocessed_dir, filename)
        try:
            if os.path.isfile(file_path):
                os.remove(file_path)
                print(f"Removed file: {file_path}")
        except Exception as e:
            print(f"Error removing {file_path}: {e}")
else:
    print(f"Directory {preprocessed_dir} does not exist.")


#### Get Classes with Multiple Images

In [None]:
import os
from collections import defaultdict

# Define directory for preprocessed images
preprocessed_dir = '/kaggle/working/preprocessed_sweetspot_images'

# Supported image extensions
image_extensions = ('.png', '.jpg', '.jpeg')

# Initialize a dictionary to count the number of images per class
class_image_count = defaultdict(list)

# Iterate through all files in the directory
for filename in os.listdir(preprocessed_dir):
    if filename.endswith(image_extensions):
        # Extract the class from the filename (assuming it's the part between 'cropped_muzzle_0_' and '_Danies_Farm')
        parts = filename.split('_')
        if len(parts) >= 7:
            class_id = parts[7]  # Extract the class part (for example: clw210021)
            class_image_count[class_id].append(filename)

# Get the list of unique classes
unique_classes = list(class_image_count.keys())

# Get the list of classes with more than one image
classes_with_multiple_images = {cls: img_list for cls, img_list in class_image_count.items() if len(img_list) > 1}

# Print unique classes
print(f"Unique classes: {unique_classes}")

# Print classes with more than one image
print(f"Classes with more than one image:")
for cls, images in classes_with_multiple_images.items():
    print(f"Class '{cls}' has {len(images)} images:")
    for image in images:
        print(f"    {image}")
