In [22]:
import os

# Print Images Count per Class in sampled data directory

train_data_path = "../sampled_data"

# Iterate through each subfolder in train_data
for folder_name in os.listdir(train_data_path):
    folder_path = os.path.join(train_data_path, folder_name)
    if os.path.isdir(folder_path):
        image_count = len(
            [
                file
                for file in os.listdir(folder_path)
                if file.endswith((".png", ".jpg", ".jpeg"))
            ]
        )
        print(f"{folder_name}: {image_count} images")

rhodesian_ridgeback: 294 images
french_bulldog: 408 images
english_bulldog: 248 images
labrador_retriever: 570 images
pug: 500 images
border_collie: 420 images
bernese_mountain_dog: 508 images
australien_shepherd: 97 images
maltese: 606 images
havanese: 219 images
chihuahua: 236 images
doberman_pinscher: 421 images
rottweiler: 331 images
golden_retriever: 521 images
beagle: 551 images


In [13]:
import os
import shutil
import random

# Generate Train, Val, Test Split for Sampled Data in Dataset Directory


# Define paths
train_data_path = "../sampled_data"
dataset_path = "../dataset"

# Create dataset directories
os.makedirs(os.path.join(dataset_path, "train"), exist_ok=True)
os.makedirs(os.path.join(dataset_path, "val"), exist_ok=True)
os.makedirs(os.path.join(dataset_path, "test"), exist_ok=True)

# Iterate through each subfolder in train_data
for folder_name in os.listdir(train_data_path):
    folder_path = os.path.join(train_data_path, folder_name)
    if os.path.isdir(folder_path):
        # Create subdirectories in dataset/train, dataset/val, dataset/test
        os.makedirs(os.path.join(dataset_path, "train", folder_name), exist_ok=True)
        os.makedirs(os.path.join(dataset_path, "val", folder_name), exist_ok=True)
        os.makedirs(os.path.join(dataset_path, "test", folder_name), exist_ok=True)

        # Get all image files in the folder
        image_files = [
            file
            for file in os.listdir(folder_path)
            if file.endswith((".png", ".jpg", ".jpeg"))
        ]

        # Shuffle the images
        random.shuffle(image_files)

        # Calculate split sizes
        test_size = min(25, len(image_files))
        val_size = min(25, len(image_files) - test_size)
        train_size = len(image_files) - test_size - val_size

        # Split the images
        test_images = image_files[:test_size]
        val_images = image_files[test_size : test_size + val_size]
        train_images = image_files[test_size + val_size :]

        # Move images to respective directories
        for image in test_images:
            shutil.copy(
                os.path.join(folder_path, image),
                os.path.join(dataset_path, "test", folder_name, image),
            )

        for image in val_images:
            shutil.copy(
                os.path.join(folder_path, image),
                os.path.join(dataset_path, "val", folder_name, image),
            )

        for image in train_images:
            shutil.copy(
                os.path.join(folder_path, image),
                os.path.join(dataset_path, "train", folder_name, image),
            )

print("Images have been split into train, val, and test sets.")

Images have been split into train, val, and test sets.


In [23]:
for class_name in os.listdir("../dataset/train"):
    class_folder = os.path.join("../dataset/train", class_name)
    if os.path.isdir(class_folder):
        image_count = len(
            [
                file
                for file in os.listdir(class_folder)
                if file.endswith((".png", ".jpg", ".jpeg"))
            ]
        )
        print(f"{class_name}: {image_count} images")

rhodesian_ridgeback: 1000 images
french_bulldog: 1000 images
english_bulldog: 1000 images
labrador_retriever: 1000 images
pug: 1000 images
border_collie: 1000 images
bernese_mountain_dog: 1000 images
australien_shepherd: 1000 images
maltese: 1000 images
havanese: 1000 images
chihuahua: 1000 images
doberman_pinscher: 1000 images
rottweiler: 1000 images
golden_retriever: 1000 images
beagle: 1000 images


In [26]:
import cv2

# Path to the sampled_data folder
sampled_data_path = "../sampled_data"

# Iterate through each subfolder in sampled_data
for folder_name in os.listdir(sampled_data_path):
    folder_path = os.path.join(sampled_data_path, folder_name)
    if os.path.isdir(folder_path):
        for image_name in os.listdir(folder_path):
            image_path = os.path.join(folder_path, image_name)
            if image_name.endswith((".png", ".jpg", ".jpeg")):
                image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
                print(f"Image shape for {image_path}: {image.shape}")
                if image is None:
                    print(f"Failed to load image: {image_path}")
                if (
                    len(image.shape) == 2 or image.shape[2] == 1
                ):  # Check if single channel
                    print(f"Single-channel image: {image_path}")

Image shape for ../sampled_data/rhodesian_ridgeback/n02087394_5552.jpg: (333, 500, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/093.jpg: (224, 224, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/n02087394_1722.jpg: (333, 500, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/096.jpg: (224, 224, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/029.jpg: (224, 224, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/n02087394_9698.jpg: (768, 853, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/Image_14.jpg: (779, 1000, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/11103.jpg: (224, 224, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/125.jpg: (224, 224, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/n02087394_4147.jpg: (500, 375, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/n02087394_8742.jpg: (461, 500, 3)
Image shape for ../sampled_data/rhodesian_ridgeback/110.jpg: (224, 224, 3)
Image shape for ../sampled_data/rhode

In [29]:
# Path to the dataset folder
dataset_path = "../dataset"

# Set to store unique color channel configurations
color_channel_set = set()

# Iterate through each subfolder in dataset
for split_name in os.listdir(dataset_path):
    split_path = os.path.join(dataset_path, split_name)
    if os.path.isdir(split_path):
        for class_name in os.listdir(split_path):
            class_folder = os.path.join(split_path, class_name)
            if os.path.isdir(class_folder):
                for image_name in os.listdir(class_folder):
                    image_path = os.path.join(class_folder, image_name)
                    if image_name.endswith((".png", ".jpg", ".jpeg")):
                        image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
                        if image is None:
                            print(f"Failed to load image: {image_path}")
                        else:
                            color_channel_set.add(image.shape)

# Print unique color channel configurations
print("Unique color channel configurations:", color_channel_set)

Unique color channel configurations: {(599, 683, 3), (800, 599, 3), (372, 540, 3), (600, 451, 3), (390, 500, 3), (1271, 1695, 3), (1024, 768, 3), (1102, 1024, 3), (662, 920, 3), (480, 640, 3), (415, 500, 3), (500, 337, 3), (640, 1140, 3), (378, 500, 3), (1123, 1500, 3), (170, 202, 3), (640, 387, 3), (525, 700, 3), (898, 1016, 3), (684, 744, 3), (1011, 1000, 3), (299, 500, 3), (892, 1024, 3), (1500, 1046, 3), (1080, 810, 3), (928, 1392, 3), (204, 175, 3), (1813, 1200, 3), (500, 446, 3), (500, 435, 3), (800, 1200, 4), (467, 432, 3), (184, 148, 3), (420, 280, 3), (1500, 1270, 3), (826, 1251, 3), (709, 600, 3), (1600, 1200, 3), (830, 552, 3), (1659, 2348, 3), (780, 1170, 4), (1088, 1116, 3), (1024, 934, 3), (1164, 1551, 3), (452, 631, 3), (251, 250, 3), (694, 1039, 3), (1227, 816, 3), (319, 320, 3), (430, 500, 3), (282, 500, 3), (428, 439, 3), (593, 600, 3), (765, 709, 3), (140, 140, 3), (943, 1000, 3), (899, 899, 3), (203, 186, 3), (836, 736, 3), (4072, 2712, 3), (780, 712, 3), (224, 169,