In [2]:
import os
import shutil
import random

# Define paths using raw strings to handle backslashes correctly
base_dir = r'C:\Users\rober\Desktop\GazeDetectionStudy'
source_dir = os.path.join(base_dir, 'ClassifiedDataset')
dataset_dir = os.path.join(base_dir, 'dataset')
train_dir = os.path.join(dataset_dir, 'train')
test_dir = os.path.join(dataset_dir, 'test')

# Define classes
classes = ['Up', 'Down', 'Center']

# Create the dataset directory structure
for split in ['train', 'test']:
    for cls in classes:
        dir_path = os.path.join(dataset_dir, split, cls)
        os.makedirs(dir_path, exist_ok=True)

# Get all image files in the source directory
try:
    image_files = [
        f for f in os.listdir(source_dir)
        if f.lower().endswith(('.jpg', '.jpeg', '.png'))
    ]
except FileNotFoundError:
    print(f"Error: Source directory '{source_dir}' does not exist.")
    exit(1)

# Organize images by class
images_by_class = {cls: [] for cls in classes}

for filename in image_files:
    filename_lower = filename.lower()
    found = False
    for cls in classes:
        if cls.lower() in filename_lower:
            images_by_class[cls].append(filename)
            found = True
            break
    if not found:
        print(f"Warning: Could not determine class for file '{filename}'. Skipping.")

# Split and copy images into train and test directories
for cls in classes:
    images = images_by_class[cls]
    if not images:
        print(f"Warning: No images found for class '{cls}'. Skipping this class.")
        continue

    random.shuffle(images)
    num_images = len(images)
    num_test = int(0.3 * num_images)  # 30% for testing
    num_train = num_images - num_test

    # Ensure at least one image is in the test set if possible
    if num_test == 0 and num_images > 0:
        num_test = 1
        num_train = num_images - num_test

    train_images = images[:num_train]
    test_images = images[num_train:]

    print(f"Class '{cls}': {num_images} images, {len(train_images)} for training, {len(test_images)} for testing.")

    # Function to copy images
    def copy_images(image_list, destination_dir, cls_name):
        for img_name in image_list:
            src_path = os.path.join(source_dir, img_name)
            dst_path = os.path.join(destination_dir, cls_name, img_name)
            try:
                shutil.copy2(src_path, dst_path)
            except IOError as e:
                print(f"Error copying '{img_name}': {e}")

    # Copy training images
    copy_images(train_images, train_dir, cls)

    # Copy testing images
    copy_images(test_images, test_dir, cls)

print("Dataset organization complete.")


Class 'Up': 34852 images, 24397 for training, 10455 for testing.
Class 'Down': 35067 images, 24547 for training, 10520 for testing.
Class 'Center': 30081 images, 21057 for training, 9024 for testing.
Dataset organization complete.
